Full Code of vosen/ZLUDA for AI

master dcc6bb8fdad2 cached
871 files
17.8 MB
4.1M tokens
1 requests
Copy disabled (too large) Download .txt
Showing preview only (16,366K chars total). Download the full file to get everything.
Repository: vosen/ZLUDA
Branch: master
Commit: dcc6bb8fdad2
Files: 871
Total size: 17.8 MB

Directory structure:
gitextract_ehz427hu/

├── .cargo/
│   └── config.toml
├── .devcontainer/
│   ├── Dockerfile
│   └── devcontainer.json
├── .git-blame-ignore-revs
├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── config.yml
│   │   └── zluda_dump.yml
│   └── workflows/
│       ├── move_tests.sh
│       ├── nightly_tests.yml
│       ├── pr_master.yml
│       ├── push_master.yml
│       ├── rocm_setup_build.sh
│       ├── rocm_setup_run.sh
│       └── trigger_nightly_tests.yml
├── .gitignore
├── .gitmodules
├── .rustfmt.toml
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── compiler/
│   ├── Cargo.toml
│   └── src/
│       ├── error.rs
│       └── main.rs
├── cuda_check/
│   ├── Cargo.toml
│   └── src/
│       ├── main.rs
│       └── win.rs
├── cuda_macros/
│   ├── .rustfmt.toml
│   ├── Cargo.toml
│   ├── build/
│   │   └── wrapper.h
│   └── src/
│       ├── cublas.rs
│       ├── cublaslt.rs
│       ├── cublaslt_internal.rs
│       ├── cuda.rs
│       ├── cudnn8.rs
│       ├── cudnn9.rs
│       ├── cufft.rs
│       ├── cusparse.rs
│       ├── lib.rs
│       └── nvml.rs
├── cuda_types/
│   ├── .rustfmt.toml
│   ├── Cargo.toml
│   └── src/
│       ├── cublas.rs
│       ├── cublaslt.rs
│       ├── cuda.rs
│       ├── cudnn.rs
│       ├── cudnn8.rs
│       ├── cudnn9.rs
│       ├── cufft.rs
│       ├── cusparse.rs
│       ├── dark_api.rs
│       ├── lib.rs
│       └── nvml.rs
├── dark_api/
│   ├── Cargo.toml
│   └── src/
│       ├── fatbin.rs
│       └── lib.rs
├── detours-sys/
│   ├── Cargo.toml
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   ├── README.md
│   ├── build/
│   │   └── wrapper.h
│   ├── build.rs
│   └── src/
│       ├── bundled_bindings.rs
│       └── lib.rs
├── docs/
│   ├── .gitignore
│   ├── .readthedocs.yaml
│   ├── book.toml
│   └── src/
│       ├── SUMMARY.md
│       ├── building.md
│       ├── faq.md
│       ├── hip_sdk.md
│       ├── llama_cpp.md
│       ├── precompiling.md
│       ├── quick_start.md
│       └── troubleshooting.md
├── ext/
│   ├── detours/
│   │   ├── .github/
│   │   │   ├── ISSUE_TEMPLATE/
│   │   │   │   ├── bug-report.md
│   │   │   │   └── question.md
│   │   │   ├── PULL_REQUEST_TEMPLATE/
│   │   │   │   └── pull_request_template.md
│   │   │   ├── codeql/
│   │   │   │   └── codeql-config.yml
│   │   │   └── workflows/
│   │   │       └── main.yml
│   │   ├── .gitignore
│   │   ├── CREDITS.TXT
│   │   ├── LICENSE.md
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── samples/
│   │   │   ├── Makefile
│   │   │   ├── README.TXT
│   │   │   ├── comeasy/
│   │   │   │   ├── Makefile
│   │   │   │   ├── comeasy.cpp
│   │   │   │   ├── wrotei.cpp
│   │   │   │   └── wrotei.rc
│   │   │   ├── commem/
│   │   │   │   ├── Makefile
│   │   │   │   └── commem.cpp
│   │   │   ├── common.mak
│   │   │   ├── cping/
│   │   │   │   ├── Makefile
│   │   │   │   ├── ReadMe.Txt
│   │   │   │   ├── cping.cpp
│   │   │   │   └── iping.idl
│   │   │   ├── disas/
│   │   │   │   ├── Makefile
│   │   │   │   ├── arm.asm
│   │   │   │   ├── disas.cpp
│   │   │   │   ├── ia64.asm
│   │   │   │   ├── unk.cpp
│   │   │   │   ├── x64.asm
│   │   │   │   └── x86.cpp
│   │   │   ├── dtest/
│   │   │   │   ├── Makefile
│   │   │   │   ├── NORMAL_IA64.TXT
│   │   │   │   ├── NORMAL_X64.TXT
│   │   │   │   ├── NORMAL_X86.TXT
│   │   │   │   ├── dtarge.cpp
│   │   │   │   ├── dtarge.h
│   │   │   │   ├── dtarge.rc
│   │   │   │   └── dtest.cpp
│   │   │   ├── dumpe/
│   │   │   │   ├── Makefile
│   │   │   │   └── dumpe.cpp
│   │   │   ├── dumpi/
│   │   │   │   ├── Makefile
│   │   │   │   └── dumpi.cpp
│   │   │   ├── dynamic_alloc/
│   │   │   │   ├── Makefile
│   │   │   │   ├── main.cpp
│   │   │   │   ├── x64.asm
│   │   │   │   └── x86.asm
│   │   │   ├── echo/
│   │   │   │   ├── Makefile
│   │   │   │   ├── echofx.cpp
│   │   │   │   ├── echofx.rc
│   │   │   │   ├── echonul.cpp
│   │   │   │   └── main.cpp
│   │   │   ├── einst/
│   │   │   │   ├── Makefile
│   │   │   │   ├── edll1x.cpp
│   │   │   │   ├── edll2x.cpp
│   │   │   │   ├── edll3x.cpp
│   │   │   │   └── einst.cpp
│   │   │   ├── excep/
│   │   │   │   ├── Makefile
│   │   │   │   ├── excep.cpp
│   │   │   │   ├── firstexc.cpp
│   │   │   │   └── firstexc.h
│   │   │   ├── findfunc/
│   │   │   │   ├── Makefile
│   │   │   │   ├── extend.cpp
│   │   │   │   ├── extend.rc
│   │   │   │   ├── findfunc.cpp
│   │   │   │   ├── symtest.cpp
│   │   │   │   ├── target.cpp
│   │   │   │   ├── target.h
│   │   │   │   └── target.rc
│   │   │   ├── impmunge/
│   │   │   │   ├── Makefile
│   │   │   │   └── impmunge.cpp
│   │   │   ├── member/
│   │   │   │   ├── Makefile
│   │   │   │   └── member.cpp
│   │   │   ├── opengl/
│   │   │   │   ├── Makefile
│   │   │   │   ├── ogldet.cpp
│   │   │   │   ├── ogldet.rc
│   │   │   │   └── testogl.cpp
│   │   │   ├── region/
│   │   │   │   ├── Makefile
│   │   │   │   └── region.cpp
│   │   │   ├── setdll/
│   │   │   │   ├── Makefile
│   │   │   │   └── setdll.cpp
│   │   │   ├── simple/
│   │   │   │   ├── Makefile
│   │   │   │   ├── simple.cpp
│   │   │   │   ├── simple.rc
│   │   │   │   └── sleep5.cpp
│   │   │   ├── slept/
│   │   │   │   ├── Makefile
│   │   │   │   ├── NORMAL_IA64.TXT
│   │   │   │   ├── NORMAL_X64.TXT
│   │   │   │   ├── NORMAL_X86.TXT
│   │   │   │   ├── dslept.cpp
│   │   │   │   ├── dslept.rc
│   │   │   │   ├── sleepbed.cpp
│   │   │   │   ├── sleepnew.cpp
│   │   │   │   ├── sleepold.cpp
│   │   │   │   ├── slept.cpp
│   │   │   │   ├── slept.h
│   │   │   │   ├── slept.rc
│   │   │   │   └── verify.cpp
│   │   │   ├── syelog/
│   │   │   │   ├── Makefile
│   │   │   │   ├── sltest.cpp
│   │   │   │   ├── sltestp.cpp
│   │   │   │   ├── syelog.cpp
│   │   │   │   ├── syelog.h
│   │   │   │   └── syelogd.cpp
│   │   │   ├── talloc/
│   │   │   │   ├── Makefile
│   │   │   │   ├── NORMAL_IA64.TXT
│   │   │   │   ├── NORMAL_X64.TXT
│   │   │   │   ├── talloc.cpp
│   │   │   │   ├── tdll1x.cpp
│   │   │   │   ├── tdll2x.cpp
│   │   │   │   ├── tdll3x.cpp
│   │   │   │   ├── tdll4x.cpp
│   │   │   │   ├── tdll5x.cpp
│   │   │   │   ├── tdll6x.cpp
│   │   │   │   ├── tdll7x.cpp
│   │   │   │   ├── tdll8x.cpp
│   │   │   │   └── tdll9x.cpp
│   │   │   ├── traceapi/
│   │   │   │   ├── Makefile
│   │   │   │   ├── _win32.cpp
│   │   │   │   ├── testapi.cpp
│   │   │   │   ├── trcapi.cpp
│   │   │   │   └── trcapi.rc
│   │   │   ├── tracebld/
│   │   │   │   ├── Makefile
│   │   │   │   ├── tracebld.cpp
│   │   │   │   ├── tracebld.h
│   │   │   │   ├── trcbld.cpp
│   │   │   │   └── trcbld.rc
│   │   │   ├── tracelnk/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trclnk.cpp
│   │   │   │   └── trclnk.rc
│   │   │   ├── tracemem/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trcmem.cpp
│   │   │   │   └── trcmem.rc
│   │   │   ├── tracereg/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trcreg.cpp
│   │   │   │   └── trcreg.rc
│   │   │   ├── traceser/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trcser.cpp
│   │   │   │   └── trcser.rc
│   │   │   ├── tracessl/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trcssl.cpp
│   │   │   │   └── trcssl.rc
│   │   │   ├── tracetcp/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trctcp.cpp
│   │   │   │   └── trctcp.rc
│   │   │   ├── tryman/
│   │   │   │   ├── Makefile
│   │   │   │   ├── managed.cs
│   │   │   │   ├── size.cpp
│   │   │   │   ├── tryman.cpp
│   │   │   │   ├── tstman.cpp
│   │   │   │   └── tstman.rc
│   │   │   └── withdll/
│   │   │       ├── Makefile
│   │   │       └── withdll.cpp
│   │   ├── src/
│   │   │   ├── Makefile
│   │   │   ├── creatwth.cpp
│   │   │   ├── detours.cpp
│   │   │   ├── detours.h
│   │   │   ├── detver.h
│   │   │   ├── disasm.cpp
│   │   │   ├── disolarm.cpp
│   │   │   ├── disolarm64.cpp
│   │   │   ├── disolia64.cpp
│   │   │   ├── disolx64.cpp
│   │   │   ├── disolx86.cpp
│   │   │   ├── image.cpp
│   │   │   ├── modules.cpp
│   │   │   └── uimports.cpp
│   │   ├── system.mak
│   │   ├── tests/
│   │   │   ├── Makefile
│   │   │   ├── catch.hpp
│   │   │   ├── corruptor.cpp
│   │   │   ├── corruptor.h
│   │   │   ├── main.cpp
│   │   │   ├── test_image_api.cpp
│   │   │   └── test_module_api.cpp
│   │   └── vc/
│   │       ├── Detours.sln
│   │       ├── Detours.vcxproj
│   │       └── Detours.vcxproj.filters
│   ├── highs-sys/
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── build.rs
│   │   ├── install-dependencies.sh
│   │   ├── src/
│   │   │   ├── c_bindings.rs
│   │   │   └── lib.rs
│   │   ├── tests/
│   │   │   ├── test_highs_call.rs
│   │   │   └── test_highs_functions.rs
│   │   └── wrapper.h
│   ├── hip_runtime-sys/
│   │   ├── .rustfmt.toml
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   ├── hipblaslt-sys/
│   │   ├── .rustfmt.toml
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   ├── miopen-sys/
│   │   ├── .rustfmt.toml
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   ├── rocblas-sys/
│   │   ├── .rustfmt.toml
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   ├── rocm_smi-sys/
│   │   ├── .rustfmt.toml
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   └── rocsparse-sys/
│       ├── .rustfmt.toml
│       ├── Cargo.toml
│       ├── build.rs
│       └── src/
│           └── lib.rs
├── format/
│   ├── .rustfmt.toml
│   ├── Cargo.toml
│   └── src/
│       ├── dark_api.rs
│       ├── dnn8.rs
│       ├── dnn9.rs
│       ├── format_generated.rs
│       ├── format_generated_blas.rs
│       ├── format_generated_blaslt.rs
│       ├── format_generated_blaslt_internal.rs
│       ├── format_generated_dnn8.rs
│       ├── format_generated_dnn9.rs
│       ├── format_generated_fft.rs
│       ├── format_generated_nvml.rs
│       ├── format_generated_sparse.rs
│       └── lib.rs
├── llvm_zluda/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── compile.rs
│       ├── device-libs/
│       │   ├── LICENSE.TXT
│       │   ├── README.md
│       │   ├── ockl.bc
│       │   └── ocml.bc
│       ├── ffi.rs
│       ├── lib.cpp
│       ├── lib.rs
│       └── utils.rs
├── ptx/
│   ├── Cargo.toml
│   ├── lib/
│   │   ├── zluda_ptx_impl.bc
│   │   └── zluda_ptx_impl.cpp
│   └── src/
│       ├── lib.rs
│       ├── pass/
│       │   ├── deparamize_functions.rs
│       │   ├── expand_operands.rs
│       │   ├── fix_special_registers.rs
│       │   ├── hoist_globals.rs
│       │   ├── insert_explicit_load_store.rs
│       │   ├── insert_implicit_conversions.rs
│       │   ├── insert_post_saturation.rs
│       │   ├── instruction_mode_to_global_mode/
│       │   │   ├── call_with_mode.ptx
│       │   │   ├── fold_denormal.ptx
│       │   │   ├── mod.rs
│       │   │   └── test.rs
│       │   ├── llvm/
│       │   │   ├── attributes.rs
│       │   │   ├── emit.rs
│       │   │   └── mod.rs
│       │   ├── mod.rs
│       │   ├── normalize_basic_blocks.rs
│       │   ├── normalize_identifiers.rs
│       │   ├── normalize_predicates.rs
│       │   ├── remove_unreachable_basic_blocks.rs
│       │   ├── replace_instructions_with_functions.rs
│       │   ├── replace_instructions_with_functions_fp_required.rs
│       │   ├── replace_known_functions.rs
│       │   ├── resolve_function_pointers.rs
│       │   └── test/
│       │       ├── expand_operands/
│       │       │   ├── immediate_conversion.ptx
│       │       │   ├── immediates.ptx
│       │       │   ├── mod.rs
│       │       │   ├── vector_extract.ptx
│       │       │   ├── vector_operand.ptx
│       │       │   └── vector_operand_convert.ptx
│       │       ├── insert_implicit_conversions/
│       │       │   ├── default.ptx
│       │       │   ├── default_reg_b32_reg_f16x2.ptx
│       │       │   ├── default_reg_b32_reg_v2_b16.ptx
│       │       │   ├── default_relaxed.ptx
│       │       │   └── mod.rs
│       │       ├── instruction_mode_to_global_mode/
│       │       │   ├── mod.rs
│       │       │   └── mode_conflict.ptx
│       │       ├── mod.rs
│       │       └── normalize_basic_blocks/
│       │           ├── mod.rs
│       │           └── trap.ptx
│       └── test/
│           ├── _Z9vectorAddPKfS0_Pfi.ptx
│           ├── ll/
│           │   ├── _attributes.ll
│           │   ├── abs.ll
│           │   ├── activemask.ll
│           │   ├── add.ll
│           │   ├── add_extended.ll
│           │   ├── add_ftz.ll
│           │   ├── add_non_coherent.ll
│           │   ├── add_s32_sat.ll
│           │   ├── add_tuning.ll
│           │   ├── addc_cc_s32.ll
│           │   ├── and.ll
│           │   ├── assertfail.ll
│           │   ├── atom_add.ll
│           │   ├── atom_add_float.ll
│           │   ├── atom_cas.ll
│           │   ├── atom_inc.ll
│           │   ├── b64tof64.ll
│           │   ├── bar_red_and_pred.ll
│           │   ├── bench.ll
│           │   ├── bfe.ll
│           │   ├── bfi.ll
│           │   ├── block.ll
│           │   ├── bmsk_clamp_b32.ll
│           │   ├── bra.ll
│           │   ├── brev.ll
│           │   ├── call.ll
│           │   ├── call_rnd.ll
│           │   ├── clz.ll
│           │   ├── const.ll
│           │   ├── const_ident.ll
│           │   ├── constant_f32.ll
│           │   ├── constant_negative.ll
│           │   ├── copysign.ll
│           │   ├── cos.ll
│           │   ├── cp_async.ll
│           │   ├── createpolicy.ll
│           │   ├── cvt_f16x2_f32.ll
│           │   ├── cvt_f64_f32.ll
│           │   ├── cvt_pack.ll
│           │   ├── cvt_relu_f16x2_f32.ll
│           │   ├── cvt_rn_bf16x2_f32.ll
│           │   ├── cvt_rn_f16x2_e4m3x2.ll
│           │   ├── cvt_rn_f16x2_e5m2x2.ll
│           │   ├── cvt_rn_satfinite_e4m3x2_f32.ll
│           │   ├── cvt_rn_satfinite_e5m2x2_f32.ll
│           │   ├── cvt_rni.ll
│           │   ├── cvt_rni_u16_f32.ll
│           │   ├── cvt_rzi.ll
│           │   ├── cvt_s16_s8.ll
│           │   ├── cvt_s32_f32.ll
│           │   ├── cvt_s64_s32.ll
│           │   ├── cvt_sat_s_u.ll
│           │   ├── cvta.ll
│           │   ├── div_approx.ll
│           │   ├── div_ftz.ll
│           │   ├── div_noftz.ll
│           │   ├── dp2a.ll
│           │   ├── dp4a.ll
│           │   ├── ex2.ll
│           │   ├── extern_func.ll
│           │   ├── extern_shared.ll
│           │   ├── extern_shared_call.ll
│           │   ├── fma.ll
│           │   ├── fma_bf16x2.ll
│           │   ├── fma_f16x2.ll
│           │   ├── fmax.ll
│           │   ├── global_array.ll
│           │   ├── global_array_f32.ll
│           │   ├── lanemask_le.ll
│           │   ├── lanemask_lt.ll
│           │   ├── ld_st.ll
│           │   ├── ld_st_implicit.ll
│           │   ├── ld_st_offset.ll
│           │   ├── ldmatrix.ll
│           │   ├── ldmatrix_trans.ll
│           │   ├── lg2.ll
│           │   ├── local_align.ll
│           │   ├── mad_extended.ll
│           │   ├── mad_s32.ll
│           │   ├── mad_wide.ll
│           │   ├── malformed_label.ll
│           │   ├── max.ll
│           │   ├── membar.ll
│           │   ├── min.ll
│           │   ├── min_f16.ll
│           │   ├── min_nan_f16.ll
│           │   ├── mma_m16n8k16_f32_bf16_bf16_f32.ll
│           │   ├── mma_m16n8k16_f32_bf16_bf16_f32_2x.ll
│           │   ├── mma_m16n8k16_f32_f16_f16_f32.ll
│           │   ├── mma_m16n8k32_s32_s8_s8_s32.ll
│           │   ├── mma_m16n8k32_s32_s8_s8_s32_interleave.ll
│           │   ├── mov.ll
│           │   ├── mov_address.ll
│           │   ├── mul24_hi_s32.ll
│           │   ├── mul24_hi_u32.ll
│           │   ├── mul24_lo_s32.ll
│           │   ├── mul24_lo_u32.ll
│           │   ├── mul_ftz.ll
│           │   ├── mul_hi.ll
│           │   ├── mul_lo.ll
│           │   ├── mul_non_ftz.ll
│           │   ├── mul_wide.ll
│           │   ├── multiple_return.ll
│           │   ├── nanosleep.ll
│           │   ├── neg.ll
│           │   ├── non_scalar_ptr_offset.ll
│           │   ├── noreturn.ll
│           │   ├── not.ll
│           │   ├── ntid.ll
│           │   ├── or.ll
│           │   ├── param_is_addressable.ll
│           │   ├── popc.ll
│           │   ├── pred_not.ll
│           │   ├── prmt.ll
│           │   ├── prmt_slow.ll
│           │   ├── rcp.ll
│           │   ├── redux_sync_add_u32_partial.ll
│           │   ├── redux_sync_op_s32.ll
│           │   ├── redux_sync_op_u32.ll
│           │   ├── reg_local.ll
│           │   ├── reg_multi.ll
│           │   ├── rem.ll
│           │   ├── rsqrt.ll
│           │   ├── sad_s64.ll
│           │   ├── selp.ll
│           │   ├── selp_true.ll
│           │   ├── set_f16.ll
│           │   ├── setp.ll
│           │   ├── setp_gt.ll
│           │   ├── setp_leu.ll
│           │   ├── setp_nan.ll
│           │   ├── setp_num.ll
│           │   ├── shared_ptr_32.ll
│           │   ├── shared_ptr_take_address.ll
│           │   ├── shared_unify_extern.ll
│           │   ├── shared_unify_local.ll
│           │   ├── shared_variable.ll
│           │   ├── shf_l.ll
│           │   ├── shf_l_clamp.ll
│           │   ├── shf_l_wrap.ll
│           │   ├── shf_r.ll
│           │   ├── shf_r_clamp.ll
│           │   ├── shf_r_wrap.ll
│           │   ├── shfl_sync_bfly_b32_pred.ll
│           │   ├── shfl_sync_down_b32_pred.ll
│           │   ├── shfl_sync_idx_b32_pred.ll
│           │   ├── shfl_sync_mode_b32.ll
│           │   ├── shfl_sync_up_b32_pred.ll
│           │   ├── shl.ll
│           │   ├── shr.ll
│           │   ├── shr_oob.ll
│           │   ├── sign_extend.ll
│           │   ├── sin.ll
│           │   ├── sqrt.ll
│           │   ├── sqrt_rn_ftz.ll
│           │   ├── stateful_ld_st_ntid.ll
│           │   ├── stateful_ld_st_ntid_chain.ll
│           │   ├── stateful_ld_st_ntid_sub.ll
│           │   ├── stateful_ld_st_simple.ll
│           │   ├── stateful_neg_offset.ll
│           │   ├── sub.ll
│           │   ├── sub_extended.ll
│           │   ├── subc_cc_s32.ll
│           │   ├── tanh.ll
│           │   ├── tid.ll
│           │   ├── trap.ll
│           │   ├── uint_to_fp_bf16.ll
│           │   ├── vector.ll
│           │   ├── vector4.ll
│           │   ├── vector8.ll
│           │   ├── vector8_extract.ll
│           │   ├── vector_extract.ll
│           │   ├── vector_operand.ll
│           │   ├── vote_all.ll
│           │   ├── vote_all_sub.ll
│           │   ├── vote_any.ll
│           │   ├── vote_ballot.ll
│           │   ├── warp_sz.ll
│           │   └── xor.ll
│           ├── mod.rs
│           ├── operands.ptx
│           ├── spirv_build/
│           │   ├── bar_sync.ptx
│           │   ├── global_extern_array.ptx
│           │   └── param_func_array_0.ptx
│           ├── spirv_fail/
│           │   ├── const_ptr.ptx
│           │   ├── global_ptr.ptx
│           │   ├── local_ptr.txt
│           │   ├── param_entry_array_0.ptx
│           │   ├── param_vector.ptx
│           │   ├── shared_ptr.ptx
│           │   └── shared_ptr2.ptx
│           ├── spirv_run/
│           │   ├── abs.ptx
│           │   ├── activemask.ptx
│           │   ├── add.ptx
│           │   ├── add_extended.ptx
│           │   ├── add_ftz.ptx
│           │   ├── add_non_coherent.ptx
│           │   ├── add_s32_sat.ptx
│           │   ├── add_tuning.ptx
│           │   ├── addc_cc_s32.ptx
│           │   ├── and.ptx
│           │   ├── assertfail.ptx
│           │   ├── atom_add.ptx
│           │   ├── atom_add_float.ptx
│           │   ├── atom_cas.ptx
│           │   ├── atom_inc.ptx
│           │   ├── atomics_128.ptx
│           │   ├── b64tof64.ptx
│           │   ├── bar_red_and_pred.ptx
│           │   ├── bfe.ptx
│           │   ├── bfi.ptx
│           │   ├── block.ptx
│           │   ├── bmsk_clamp_b32.ptx
│           │   ├── bra.ptx
│           │   ├── brev.ptx
│           │   ├── call.ptx
│           │   ├── call_rnd.ptx
│           │   ├── clz.ptx
│           │   ├── const.ptx
│           │   ├── const_ident.ptx
│           │   ├── constant_f32.ptx
│           │   ├── constant_negative.ptx
│           │   ├── copysign.ptx
│           │   ├── cos.ptx
│           │   ├── cp_async.ptx
│           │   ├── createpolicy.ptx
│           │   ├── cvt_f16x2_f32.ptx
│           │   ├── cvt_f64_f32.ptx
│           │   ├── cvt_pack.ptx
│           │   ├── cvt_relu_f16x2_f32.ptx
│           │   ├── cvt_rn_bf16x2_f32.ptx
│           │   ├── cvt_rn_f16x2_e4m3x2.ptx
│           │   ├── cvt_rn_f16x2_e5m2x2.ptx
│           │   ├── cvt_rn_satfinite_e4m3x2_f32.ptx
│           │   ├── cvt_rn_satfinite_e5m2x2_f32.ptx
│           │   ├── cvt_rni.ptx
│           │   ├── cvt_rni_u16_f32.ptx
│           │   ├── cvt_rzi.ptx
│           │   ├── cvt_s16_s8.ptx
│           │   ├── cvt_s32_f32.ptx
│           │   ├── cvt_s64_s32.ptx
│           │   ├── cvt_sat_s_u.ptx
│           │   ├── cvta.ptx
│           │   ├── div_approx.ptx
│           │   ├── div_ftz.ptx
│           │   ├── div_noftz.ptx
│           │   ├── dp2a.ptx
│           │   ├── dp4a.ptx
│           │   ├── ex2.ptx
│           │   ├── extern_func.ptx
│           │   ├── extern_shared.ptx
│           │   ├── extern_shared_call.ptx
│           │   ├── fma.ptx
│           │   ├── fma_bf16x2.ptx
│           │   ├── fma_f16x2.ptx
│           │   ├── fmax.ptx
│           │   ├── func_ptr.ptx
│           │   ├── global_array.ptx
│           │   ├── global_array_f32.ptx
│           │   ├── implicit_param.ptx
│           │   ├── lanemask_lt.ptx
│           │   ├── ld_st.ptx
│           │   ├── ld_st_implicit.ptx
│           │   ├── ld_st_offset.ptx
│           │   ├── ldmatrix.ptx
│           │   ├── ldmatrix_trans.ptx
│           │   ├── lg2.ptx
│           │   ├── local_align.ptx
│           │   ├── mad_extended.ptx
│           │   ├── mad_s32.ptx
│           │   ├── mad_wide.ptx
│           │   ├── malformed_label.ptx
│           │   ├── max.ptx
│           │   ├── membar.ptx
│           │   ├── min.ptx
│           │   ├── min_f16.ptx
│           │   ├── min_nan_f16.ptx
│           │   ├── mma_m16n8k16_f32_bf16_bf16_f32.ptx
│           │   ├── mma_m16n8k16_f32_bf16_bf16_f32_2x.ptx
│           │   ├── mma_m16n8k16_f32_f16_f16_f32.ptx
│           │   ├── mma_m16n8k32_s32_s8_s8_s32.ptx
│           │   ├── mma_m16n8k32_s32_s8_s8_s32_interleave.ptx
│           │   ├── mod.rs
│           │   ├── mov.ptx
│           │   ├── mov_address.ptx
│           │   ├── mul24_hi_s32.ptx
│           │   ├── mul24_hi_u32.ptx
│           │   ├── mul24_lo_s32.ptx
│           │   ├── mul24_lo_u32.ptx
│           │   ├── mul_ftz.ptx
│           │   ├── mul_hi.ptx
│           │   ├── mul_lo.ptx
│           │   ├── mul_non_ftz.ptx
│           │   ├── mul_wide.ptx
│           │   ├── multiple_return.ptx
│           │   ├── nanosleep.ptx
│           │   ├── neg.ptx
│           │   ├── non_scalar_ptr_offset.ptx
│           │   ├── noreturn.ptx
│           │   ├── not.ptx
│           │   ├── ntid.ptx
│           │   ├── or.ptx
│           │   ├── param_is_addressable.ptx
│           │   ├── popc.ptx
│           │   ├── pred_not.ptx
│           │   ├── prmt.ptx
│           │   ├── prmt_slow.ptx
│           │   ├── rcp.ptx
│           │   ├── redux_sync_add_u32_partial.ptx
│           │   ├── redux_sync_op_s32.ptx
│           │   ├── redux_sync_op_u32.ptx
│           │   ├── reg_local.ptx
│           │   ├── reg_multi.ptx
│           │   ├── rem.ptx
│           │   ├── rsqrt.ptx
│           │   ├── sad_s64.ptx
│           │   ├── selp.ptx
│           │   ├── selp_true.ptx
│           │   ├── set_f16.ptx
│           │   ├── setp.ptx
│           │   ├── setp_gt.ptx
│           │   ├── setp_leu.ptx
│           │   ├── setp_nan.ptx
│           │   ├── setp_num.ptx
│           │   ├── shared_ptr_32.ptx
│           │   ├── shared_ptr_take_address.ptx
│           │   ├── shared_unify_extern.ptx
│           │   ├── shared_unify_local.ptx
│           │   ├── shared_variable.ptx
│           │   ├── shf_l.ptx
│           │   ├── shf_l_clamp.ptx
│           │   ├── shf_l_wrap.ptx
│           │   ├── shf_r.ptx
│           │   ├── shf_r_clamp.ptx
│           │   ├── shf_r_wrap.ptx
│           │   ├── shfl_sync_bfly_b32_pred.ptx
│           │   ├── shfl_sync_down_b32_pred.ptx
│           │   ├── shfl_sync_idx_b32_pred.ptx
│           │   ├── shfl_sync_mode_b32.ptx
│           │   ├── shfl_sync_up_b32_pred.ptx
│           │   ├── shl.ptx
│           │   ├── shr.ptx
│           │   ├── shr_oob.ptx
│           │   ├── sign_extend.ptx
│           │   ├── sin.ptx
│           │   ├── sqrt.ptx
│           │   ├── sqrt_rn_ftz.ptx
│           │   ├── stateful_ld_st_ntid.ptx
│           │   ├── stateful_ld_st_ntid_chain.ptx
│           │   ├── stateful_ld_st_ntid_sub.ptx
│           │   ├── stateful_ld_st_simple.ptx
│           │   ├── stateful_neg_offset.ptx
│           │   ├── sub.ptx
│           │   ├── sub_extended.ptx
│           │   ├── subc_cc_s32.ptx
│           │   ├── tanh.ptx
│           │   ├── tid.ptx
│           │   ├── trap.ptx
│           │   ├── uint_to_fp_bf16.ptx
│           │   ├── vector.ptx
│           │   ├── vector4.ptx
│           │   ├── vector8.ptx
│           │   ├── vector8_extract.ptx
│           │   ├── vector_extract.ptx
│           │   ├── vector_operand.ptx
│           │   ├── verify.py
│           │   ├── vote_all.ptx
│           │   ├── vote_all_sub.ptx
│           │   ├── vote_any.ptx
│           │   ├── vote_ballot.ptx
│           │   ├── warp_sz.ptx
│           │   └── xor.ptx
│           ├── vectorAdd_11.ptx
│           └── vectorAdd_kernel64.ptx
├── ptx_parser/
│   ├── Cargo.toml
│   └── src/
│       ├── ast.rs
│       ├── check_args.py
│       └── lib.rs
├── ptx_parser_macros/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── ptx_parser_macros_impl/
│   ├── Cargo.toml
│   └── src/
│       ├── lib.rs
│       └── parser.rs
├── ptxas/
│   ├── Cargo.toml
│   └── src/
│       └── main.rs
├── xtask/
│   ├── Cargo.toml
│   └── src/
│       └── main.rs
├── zluda/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── impl/
│       │   ├── context.rs
│       │   ├── device.rs
│       │   ├── driver.rs
│       │   ├── event.rs
│       │   ├── function.rs
│       │   ├── graph.rs
│       │   ├── hipfix.rs
│       │   ├── kernel.rs
│       │   ├── library.rs
│       │   ├── memory.rs
│       │   ├── mod.rs
│       │   ├── module.rs
│       │   ├── os_unix.rs
│       │   ├── os_win.rs
│       │   ├── pointer.rs
│       │   └── stream.rs
│       ├── lib.rs
│       ├── os_unix.rs
│       ├── os_win.rs
│       └── tests.rs
├── zluda_bindgen/
│   ├── Cargo.toml
│   ├── build/
│   │   ├── cublasLt_internal.h
│   │   ├── cublas_wrapper.h
│   │   ├── cuda_wrapper.h
│   │   ├── cudnn_v8/
│   │   │   ├── cudnn_adv_infer.h
│   │   │   ├── cudnn_adv_train.h
│   │   │   ├── cudnn_backend.h
│   │   │   ├── cudnn_cnn_infer.h
│   │   │   ├── cudnn_cnn_train.h
│   │   │   ├── cudnn_ops_infer.h
│   │   │   ├── cudnn_ops_train.h
│   │   │   └── cudnn_version.h
│   │   ├── cufft_wraper.h
│   │   └── decompile_cublaslt_internal.py
│   └── src/
│       ├── main.rs
│       └── process_table.rs
├── zluda_blas/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── impl.rs
│       ├── lib.rs
│       └── tests.rs
├── zluda_blaslt/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── impl.rs
│       └── lib.rs
├── zluda_cache/
│   ├── Cargo.toml
│   ├── diesel.toml
│   ├── migrations/
│   │   ├── .keep
│   │   └── 2025-08-04-203347_create_initial/
│   │       ├── down.sql
│   │       └── up.sql
│   └── src/
│       ├── lib.rs
│       ├── models.rs
│       └── schema.rs
├── zluda_common/
│   ├── Cargo.toml
│   └── src/
│       ├── constants.rs
│       ├── lib.rs
│       ├── os_unix.rs
│       └── os_win.rs
├── zluda_dnn/
│   ├── Cargo.toml
│   └── src/
│       ├── impl.rs
│       └── lib.rs
├── zluda_dnn8/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       └── lib.rs
├── zluda_dnn9/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── lib.rs
│       └── tests.rs
├── zluda_fft/
│   ├── Cargo.toml
│   └── src/
│       ├── impl.rs
│       └── lib.rs
├── zluda_inject/
│   ├── Cargo.toml
│   ├── build.rs
│   ├── src/
│   │   ├── args.rs
│   │   ├── bin.rs
│   │   ├── main.rs
│   │   └── win.rs
│   └── tests/
│       ├── helpers/
│       │   ├── direct_cuinit.rs
│       │   ├── do_cuinit.rs
│       │   ├── do_cuinit_early.rs
│       │   ├── do_cuinit_late.rs
│       │   ├── do_cuinit_late_clr.cs
│       │   ├── indirect_cuinit.rs
│       │   └── subprocess.rs
│       └── inject.rs
├── zluda_ld/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_ml/
│   ├── Cargo.toml
│   └── src/
│       ├── impl_common.rs
│       ├── impl_unix.rs
│       ├── impl_win.rs
│       └── lib.rs
├── zluda_precompile/
│   ├── Cargo.toml
│   └── src/
│       └── main.rs
├── zluda_redirect/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_sparse/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── impl.rs
│       └── lib.rs
├── zluda_trace/
│   ├── Cargo.toml
│   └── src/
│       ├── dark_api.rs
│       ├── lib.rs
│       ├── log.rs
│       ├── os_unix.rs
│       ├── os_win.rs
│       └── trace.rs
├── zluda_trace_blas/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_blaslt/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_common/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_dnn8/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_dnn9/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_fft/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_nvml/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_sparse/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
└── zluda_windows/
    ├── Cargo.toml
    ├── library.manifest
    ├── manifest.rc
    └── src/
        └── lib.rs

================================================
FILE CONTENTS
================================================

================================================
FILE: .cargo/config.toml
================================================
[alias]
xtask = "run --package xtask --"

[target.x86_64-pc-windows-msvc]
rustflags = ["-Ctarget-feature=+crt-static"]


================================================
FILE: .devcontainer/Dockerfile
================================================
FROM nvidia/cuda:13.0.1-base-ubuntu24.04

RUN DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
    wget \
    build-essential \
    cmake \
    ninja-build \
    python3 \
    ripgrep \
    git \
    ltrace \
    # required by llvm 17
    lsb-release software-properties-common gnupg

ARG LLVM_VERSION=17
RUN wget https://apt.llvm.org/llvm.sh && \
    chmod +x llvm.sh && \
    ./llvm.sh ${LLVM_VERSION}

# Feel free to change to a newer version if you have a newer verison on your host
ARG CUDA_PKG_VERSION=13-0
# Docker <-> host  driver version compatiblity is newer host <-> older docker
# Driver 580+ is required for CUDA 13
ARG CUDA_DRIVER=580
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb && \
    dpkg-deb -R libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb /opt && \
    rm libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb
RUN DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
    # CUDA headers need it for interop
    libgl-dev libegl-dev libvdpau-dev \
    nvidia-headless-no-dkms-${CUDA_DRIVER}-open \
    cuda-cudart-dev-${CUDA_PKG_VERSION} \
    cuda-nvml-dev-${CUDA_PKG_VERSION} \
    cuda-cudart-${CUDA_PKG_VERSION} \
    cuda-profiler-api-${CUDA_PKG_VERSION} \
    cuda-nvcc-${CUDA_PKG_VERSION} \
    cudnn9-cuda-${CUDA_PKG_VERSION} \
    libcufft-dev-${CUDA_PKG_VERSION} \
    libcublas-dev-${CUDA_PKG_VERSION} \
    libcusparse-dev-${CUDA_PKG_VERSION}

ARG ROCM_VERSION=6.4.4
RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
    wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
    gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \
    echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} noble main" > /etc/apt/sources.list.d/rocm.list && \
    echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600 && \
    DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
    rocminfo \
    rocm-gdb \
    rocm-smi-lib amd-smi-lib \
    rocm-llvm-dev \
    hip-runtime-amd \
    miopen-hip-dev \
    rocfft-dev \
    rocblas-dev \
    hipblaslt-dev \
    rocsolver-dev \
    rocsparse-dev \
    hip-dev && \
    echo '/opt/rocm/lib' > /etc/ld.so.conf.d/rocm.conf && \
    ldconfig

ENV PATH=$PATH:/opt/rocm-${ROCM_VERSION}/bin



================================================
FILE: .devcontainer/devcontainer.json
================================================
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/rust
{
	"name": "zluda",
	"build": {
		"dockerfile": "Dockerfile"
	},
	"securityOpt": [ "seccomp=unconfined" ],
	"runArgs": [
		//"--runtime=nvidia",
		"--device=/dev/kfd",
		"--device=/dev/dri",
		"--group-add=video"
	],
	"mounts": [
		{
			"source": "${localEnv:HOME}/.cargo/",
			"target": "/root/.cargo",
			"type": "bind"
		}
	],
	// https://containers.dev/features.
	"features": {
		"ghcr.io/devcontainers/features/rust:1": {}
	},
	// https://aka.ms/dev-containers-non-root.
	"remoteUser": "root",
	"hostRequirements": { "gpu": true },
	"customizations": {
		"vscode": {
			"extensions": [ "mhutchie.git-graph" ]
		}
	},
	"containerEnv": {
		"NVIDIA_DISABLE_REQUIRE": "1"
	}
}


================================================
FILE: .git-blame-ignore-revs
================================================
21ef5f60a3a5efa17855a30f6b5c7d1968cd46ba


================================================
FILE: .gitattributes
================================================
ext/** linguist-vendored
*.dll filter=lfs diff=lfs merge=lfs -text
*.bc filter=lfs diff=lfs merge=lfs -text


================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true


================================================
FILE: .github/ISSUE_TEMPLATE/zluda_dump.yml
================================================
name: Bug Report
description: Report an issue with ZLUDA
body:
  - type: markdown
    attributes:
      value: |
        If you are reporting an application that is not supported by ZLUDA, please use zluda_trace to
        create logs. See instructions here: https://zluda.readthedocs.io/latest/troubleshooting.html
  - type: textarea
    id: logs
    attributes:
      label: zluda_trace logs (tarball/zip file)
      description: |
        Please create a tarball (`.tar.gz`) or zip file (`.zip`) of your log directory and attach
        it here. You can drag and drop files directly into the comment box. Please also include
        zluda_trace logs using CUDA if you have NVIDIA hardware to test on.
      placeholder: Attach file (e.g., drag and drop)
    validations:
      required: false
  - type: textarea
    id: description
    attributes:
      label: Description
      description: |
        Describe the issue you've encountered. What is the expected behavior? What is the actual
        behavior?
      placeholder: Description
    validations:
      required: false
  - type: textarea
    id: reproduce
    attributes:
      label: Steps to reproduce
      description: |
        Please describe the application you were running and provide clear, step-by-step
        instructions to run it.
      placeholder: |
        example:
          1. Download llm.c: git clone https://github.com/karpathy/llm.c.git
          2. Navigate to the directory: cd llm.c
          3. Download the model and train it:
            chmod u+x ./dev/download_starter_pack.sh
            ./dev/download_starter_pack.sh
            make train_gpt2fp32cu
            ./train_gpt2fp32cu
          4. Build and run the tests:
            make test_gpt2fp32cu
            LD_LIBRARY_PATH=<ZLUDA_LOG_DIR> ./test_gpt2fp32cu
    validations:
      required: true
  - type: input
    id: version
    attributes:
      label: ZLUDA version
      description: What version of ZLUDA are you using? Due to legal issues **versions older than 4 are not supported**
      placeholder:  "example: 5-preview.113"
    validations:
      required: true
  - type: input
    id: os
    attributes:
      label: Operating System
      description: What operating system are you using? (e.g., distribution and version)
      placeholder:  "example: Ubuntu 22.04.5 LTS"
    validations:
      required: true
  - type: input
    id: gpu
    attributes:
      label: GPU
      description: What GPU are you using?
      placeholder:  "example: AMD Radeon RX 6600"
    validations:
      required: true


================================================
FILE: .github/workflows/move_tests.sh
================================================
#!/bin/bash
set -ex
TEST_EXECUTABLES_DIR=$1
SUFFIX=$2

ls ${TEST_EXECUTABLES_DIR}/* | sort -u | while read -r executable; do
    output=$("$executable" --list 2>/dev/null)
    exit_code=$?
    if [ $exit_code -eq 0 ] && echo "$output" | grep -q "_${SUFFIX}: test$"; then
        mv "$executable" "${TEST_EXECUTABLES_DIR}/../${SUFFIX}/"
    fi
done


================================================
FILE: .github/workflows/nightly_tests.yml
================================================
name: Nightly tests
on:
  workflow_call:
  workflow_dispatch:

env:
  ROCM_VERSION: "6.3.4"
  AMDGPU_VERSION: "6.4.4"
  TEST_THREADS: 24

jobs:
  run_tests:
    runs-on: gpu_large
    steps:
    - uses: actions/checkout@v4
      with:
        repository: 'vosen/ZLUDA'
        path: zluda-src
        sparse-checkout: |
          .github/workflows/rocm_setup_run.sh
    - name: Install ROCm
      run: sudo bash zluda-src/.github/workflows/rocm_setup_run.sh ${{ env.ROCM_VERSION }} ${{ env.AMDGPU_VERSION }}
    - uses: actions/checkout@v4
      with:
        repository: 'vosen/ptx_tests'
    - uses: robinraju/release-downloader@v1
      with:
        repository: 'vosen/ZLUDA'
        latest: true
        preRelease: true
        extract: true
        fileName: 'zluda-linux-*.tar.gz'
    - name: Build and run
      run: |
        DEBIAN_FRONTEND=noninteractive sudo apt install -y --no-install-recommends curl
        curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain nightly --profile minimal -y
        source ~/.cargo/env
        pids=()
        exit_codes=()
        for i in $(seq 0 $((${{ env.TEST_THREADS }} - 1))); do
          cargo run -r -- zluda/libcuda.so.1 --shard-index $i --shard-count ${{ env.TEST_THREADS }} > output_$i.log 2>&1 &
          pids+=($!)
        done
        for pid in "${pids[@]}"; do
          wait $pid
          exit_codes+=($?)
        done
        error_occurred=0
        for i in "${!exit_codes[@]}"; do
          if [ ${exit_codes[$i]} -ne 0 ]; then
            error_occurred=1
          fi
        done
        exit $error_occurred
    - name: Upload logs
      if: always()
      uses: actions/upload-artifact@v4
      with:
        name: output_logs
        path: output_*.log

================================================
FILE: .github/workflows/pr_master.yml
================================================
name: ZLUDA
on:
  pull_request:
    branches: [ master ]

env:
  CARGO_TERM_COLOR: always
  CARGO_PROFILE: release
  SCCACHE_GHA_ENABLED: "true"
  RUSTC_WRAPPER: "sccache"
  SCCACHE_MAX_FRAME_LENGTH: "104857600"  # 100 MB
  ROCM_VERSION: "6.3.4"
  AMDGPU_VERSION: "6.4.4"

jobs:
  check_whitespace:
    name: Check Whitespace
    runs-on: ubuntu-22.04
    steps:
      - run: |
          sudo apt install fd-find
          fdfind \
            --exclude '*.bc' \
            --exclude '*.exe' \
            --exclude '*.lib' \
            --exclude ext/detours \
            --strip-cwd-prefix \
            --type file \
            --exec bash -c '
              diff \
                --unified \
                --label "a/$0" \
                --label "b/$0" \
                <(cat "$0") \
                <(sed --regexp-extended "s/\s+$//; \$a\\" "$0")
            '
  formatting:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          components: rustfmt
      - name: Check Rust formatting
        uses: actions-rust-lang/rustfmt@v1
  build_linux:
    name: Build (Linux)
    runs-on: ubuntu-22.04
    steps:
    - uses: jlumbroso/free-disk-space@v1.3.1
    - uses: actions/checkout@v4
      with:
        lfs: true
        submodules: true
    - name: Install ROCm
      run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }}
    - name: Run sccache-cache
      uses: mozilla-actions/sccache-action@v0.0.9
    - name: Build
      # https://github.com/actions/upload-artifact/issues/39
      run: |
        cargo xtask zip --profile ${{ env.CARGO_PROFILE }}
        mkdir target/${{ env.CARGO_PROFILE }}/zluda
        tar -xzf target/${{ env.CARGO_PROFILE }}/zluda.tar.gz -C target/${{ env.CARGO_PROFILE }}/zluda
    - name: Set revision hash
      run: echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
    - name: Upload
      uses: actions/upload-artifact@v4
      with:
        name: zluda-linux-${{ env.SHORT_SHA }}
        path: target/${{ env.CARGO_PROFILE }}/zluda
  build_windows:
    name: Build (Windows)
    runs-on: windows-2022
    steps:
    - uses: actions/checkout@v4
      with:
        lfs: true
        submodules: true
    - name: Run sccache-cache
      uses: mozilla-actions/sccache-action@v0.0.9
    - name: Build
      run: |
        cargo xtask zip --profile ${{ env.CARGO_PROFILE }}
        Expand-Archive -Path target/${{ env.CARGO_PROFILE }}/zluda.zip -DestinationPath target/${{ env.CARGO_PROFILE }}/zluda
    - name: Set revision hash
      run: echo "SHORT_SHA=$("${{ github.sha }}".SubString(0, 7))" >> $env:GITHUB_ENV
    - name: Upload
      uses: actions/upload-artifact@v4
      with:
        name: zluda-windows-${{ env.SHORT_SHA }}
        path: target/${{ env.CARGO_PROFILE }}/zluda
  build_tests:
    name: Build AMD GPU unit tests
    runs-on: gpu_small
    outputs:
      test_package: ${{ steps.upload_artifacts.outputs.artifact-id }}
    steps:
    - uses: jlumbroso/free-disk-space@v1.3.1
    - name: Install build tools
      run: |
        sudo apt update
        sudo apt install -y git git-lfs build-essential cmake
    - uses: actions/checkout@v4
      with:
        lfs: true
        submodules: true
    - uses: actions-rust-lang/setup-rust-toolchain@v1
      with:
        rustflags: ""
    - name: Install ROCm
      run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }}
    - name: Run sccache-cache
      uses: mozilla-actions/sccache-action@v0.0.9
    - uses: taiki-e/install-action@v2
      with:
        tool: cargo-export
    - name: Build
      run: |
        cargo export target/tests -- test --features ci_build --workspace \
          --exclude cuda_macros \
          --exclude ptx_parser_macros \
          --exclude zluda_inject \
          --exclude zluda_redirect
        mkdir -p target/amdgpu
        bash .github/workflows/move_tests.sh target/tests amdgpu
        strip target/amdgpu/*
    - name: Upload
      id: upload_artifacts
      uses: actions/upload-artifact@v4
      with:
        name: tests
        path: target/amdgpu
        retention-days: 7
  run_tests:
    name: Run AMD GPU unit tests
    runs-on: gpu_small
    needs: [build_tests]
    steps:
    - uses: actions/checkout@v4
      with:
        submodules: false
        sparse-checkout: .github
    - name: Install ROCm
      run: sudo bash .github/workflows/rocm_setup_run.sh ${{ env.ROCM_VERSION }} ${{ env.AMDGPU_VERSION }}
    - uses: actions/download-artifact@v4
      with:
        artifact-ids: ${{ needs.build_tests.outputs.test_package }}
        path: target
    - name: Run tests
      run: |
        chmod +x target/tests/*
        error_occurred=0
        for exe in target/tests/*; do
            ./"$exe" _amdgpu || { error_occurred=1; true; }
        done
        exit $error_occurred


================================================
FILE: .github/workflows/push_master.yml
================================================
name: ZLUDA
on:
  workflow_dispatch:
  push:
    branches: [ master ]

env:
  CARGO_TERM_COLOR: always
  CARGO_PROFILE: release-lto
  SCCACHE_GHA_ENABLED: "true"
  RUSTC_WRAPPER: "sccache"
  SCCACHE_MAX_FRAME_LENGTH: "104857600"  # 100 MB
  ROCM_VERSION: "6.3.4"
  AMDGPU_VERSION: "6.4.4"

jobs:
  build_linux:
    name: Build (Linux)
    runs-on: ubuntu-22.04
    permissions:
      contents: write
    steps:
    - uses: jlumbroso/free-disk-space@v1.3.1
    - uses: actions/checkout@v4
      # fetch-depth and fetch-tags are required to properly tag pre-release builds
      with:
        fetch-depth: 0
        fetch-tags: true
        lfs: true
        submodules: true
    - name: Install ROCm
      run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }}
    - name: Run sccache-cache
      uses: mozilla-actions/sccache-action@v0.0.9
    - name: Build
      # https://github.com/actions/upload-artifact/issues/39
      run: |
        cargo xtask zip --profile ${{ env.CARGO_PROFILE }}
        mkdir target/${{ env.CARGO_PROFILE }}/zluda
        tar -xzf target/${{ env.CARGO_PROFILE }}/zluda.tar.gz -C target/${{ env.CARGO_PROFILE }}/zluda
    - name: Set revision hash
      run: echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
    - name: Upload
      uses: actions/upload-artifact@v4
      with:
        name: zluda-linux-${{ env.SHORT_SHA }}
        path: target/${{ env.CARGO_PROFILE }}/zluda
    - name: Prepare artifact for release
      run: |
        mv target/${{ env.CARGO_PROFILE }}/zluda.tar.gz target/${{ env.CARGO_PROFILE }}/zluda-linux-${{ env.SHORT_SHA }}.tar.gz
        latest_tag=$(git tag -l "v*" | grep -E "^v[0-9]+$" | sort -V | tail -n 1)
        next_version="$((${latest_tag:1} + 1))"
        offset=$(git rev-list $latest_tag..HEAD --count)
        echo "VERSION=$next_version-preview.$offset" >> $GITHUB_OUTPUT
      id: prepare_artifacts
    - uses: ncipollo/release-action@v1
      with:
        prerelease: true
        generateReleaseNotes: true
        allowUpdates: true
        omitNameDuringUpdate: true
        artifacts: "target/${{ env.CARGO_PROFILE }}/zluda-linux-${{ env.SHORT_SHA }}.tar.gz"
        name: "Version ${{ steps.prepare_artifacts.outputs.VERSION }}"
        tag: "v${{ steps.prepare_artifacts.outputs.VERSION }}"
  build_windows:
    name: Build (Windows)
    runs-on: windows-2022
    permissions:
      contents: write
    steps:
    - uses: actions/checkout@v4
      # fetch-depth and fetch-tags are required to properly tag pre-release builds
      with:
        fetch-depth: 0
        fetch-tags: true
        lfs: true
        submodules: true
    - name: Run sccache-cache
      uses: mozilla-actions/sccache-action@v0.0.9
    - name: Build
      run: |
        cargo xtask zip --profile ${{ env.CARGO_PROFILE }}
        Expand-Archive -Path target/${{ env.CARGO_PROFILE }}/zluda.zip -DestinationPath target/${{ env.CARGO_PROFILE }}/zluda
    - name: Set revision hash
      run: echo "SHORT_SHA=$("${{ github.sha }}".SubString(0, 7))" >> $env:GITHUB_ENV
    - name: Upload
      uses: actions/upload-artifact@v4
      with:
        name: zluda-windows-${{ env.SHORT_SHA }}
        path: target/${{ env.CARGO_PROFILE }}/zluda
    - name: Prepare artifact for release
      shell: bash
      working-directory: ${{ github.workspace }}
      run: |
        mv target/${{ env.CARGO_PROFILE }}/zluda.zip target/${{ env.CARGO_PROFILE }}/zluda-windows-${{ env.SHORT_SHA }}.zip
        latest_tag=$(git tag -l "v*" | grep -E "^v[0-9]+$" | sort -V | tail -n 1)
        next_version="$((${latest_tag:1} + 1))"
        offset=$(git rev-list $latest_tag..HEAD --count)
        echo "VERSION=$next_version-preview.$offset" >> $GITHUB_OUTPUT
      id: prepare_artifacts
    - uses: ncipollo/release-action@v1
      with:
        prerelease: true
        generateReleaseNotes: true
        allowUpdates: true
        omitNameDuringUpdate: true
        artifacts: "target/${{ env.CARGO_PROFILE }}/zluda-windows-${{ env.SHORT_SHA }}.zip"
        name: "Version ${{ steps.prepare_artifacts.outputs.VERSION }}"
        tag: "v${{ steps.prepare_artifacts.outputs.VERSION }}"
  build_tests:
    name: Build AMD GPU unit tests
    runs-on: gpu_small
    outputs:
      test_package: ${{ steps.upload_artifacts.outputs.artifact-id }}
    steps:
    - uses: jlumbroso/free-disk-space@v1.3.1
    - name: Install build tools
      run: |
        sudo apt update
        sudo apt install -y git git-lfs build-essential cmake
    - uses: actions/checkout@v4
      with:
        lfs: true
        submodules: true
    - uses: actions-rust-lang/setup-rust-toolchain@v1
      with:
        rustflags: ""
    - name: Install ROCm
      run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }} ${{ env.AMDGPU_VERSION }}
    - name: Run sccache-cache
      uses: mozilla-actions/sccache-action@v0.0.9
    - uses: taiki-e/install-action@v2
      with:
        tool: cargo-export
    - name: Build
      run: |
        cargo export target/tests -- test --features ci_build --workspace \
          --exclude cuda_macros \
          --exclude ptx_parser_macros \
          --exclude zluda_inject \
          --exclude zluda_redirect
        mkdir -p target/amdgpu
        bash .github/workflows/move_tests.sh target/tests amdgpu
        strip target/amdgpu/*
    - name: Upload
      id: upload_artifacts
      uses: actions/upload-artifact@v4
      with:
        name: tests
        path: target/amdgpu
        retention-days: 7
  run_tests:
    name: Run AMD GPU unit tests
    runs-on: gpu_small
    needs: [build_tests]
    steps:
    - uses: actions/checkout@v4
      with:
        submodules: false
        sparse-checkout: .github
    - name: Install ROCm
      run: sudo bash .github/workflows/rocm_setup_run.sh ${{ env.ROCM_VERSION }} ${{ env.AMDGPU_VERSION }}
    - uses: actions/download-artifact@v4
      with:
        artifact-ids: ${{ needs.build_tests.outputs.test_package }}
        path: target
    - name: Run tests
      run: |
        chmod +x target/tests/*
        error_occurred=0
        for exe in target/tests/*; do
            ./"$exe" _amdgpu || { error_occurred=1; true; }
        done
        exit $error_occurred


================================================
FILE: .github/workflows/rocm_setup_build.sh
================================================
#!/bin/bash
set -ex
ROCM_VERSION=$1

DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends gpg patchelf
# Source: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/install-methods/package-manager/package-manager-ubuntu.html
mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
    gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION jammy main | tee /etc/apt/sources.list.d/rocm.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \
    | tee /etc/apt/preferences.d/rocm-pin-600
DEBIAN_FRONTEND=noninteractive apt update -y
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends rocm-smi-lib rocm-llvm-dev hip-runtime-amd hip-dev rocblas-dev hipblaslt-dev miopen-hip-dev rocsparse-dev
echo 'export PATH="$PATH:/opt/rocm/bin"' |  tee /etc/profile.d/rocm.sh
echo "/opt/rocm/lib" | tee /etc/ld.so.conf.d/rocm.conf
ldconfig


================================================
FILE: .github/workflows/rocm_setup_run.sh
================================================
#!/bin/bash
set -ex
ROCM_VERSION=$1
AMDGPU_VERSION=$2

DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends gpg zstd unzip "linux-headers-$(uname -r)" "linux-modules-extra-$(uname -r)"
# Source: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/install-methods/package-manager/package-manager-ubuntu.html
mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
    gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION noble main | tee /etc/apt/sources.list.d/rocm.list
echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu noble main | tee /etc/apt/sources.list.d/amdgpu.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \
    | tee /etc/apt/preferences.d/rocm-pin-600
DEBIAN_FRONTEND=noninteractive apt update -y
# rocm-smi-lib shouldn't be necessary, but somehow ptx tests started linking to it.
# Result of Rust 1.90 linker change?
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends amdgpu-dkms hip-runtime-amd rocm-smi-lib
echo 'export PATH="$PATH:/opt/rocm/bin"' |  tee /etc/profile.d/rocm.sh
echo "/opt/rocm/lib" | tee /etc/ld.so.conf.d/rocm.conf
ldconfig

#Grant access to GPUs to all users via udev rules
cat <<'EOF' > /etc/udev/rules.d/70-amdgpu.rules
KERNEL=="kfd", MODE="0666"
SUBSYSTEM=="drm", KERNEL=="renderD*", MODE="0666"
EOF
udevadm control --reload-rules && udevadm trigger
modprobe amdgpu

================================================
FILE: .github/workflows/trigger_nightly_tests.yml
================================================
name: Trigger nightly tests
on:
  schedule:
    - cron: "0 8 * * *"

jobs:
  check_last_nightly_run:
    runs-on: 'ubuntu-latest'
    outputs:
      last_sha: ${{ fromJson(steps.check_last_run.outputs.data).workflow_runs[0].head_sha }}
    steps:
    - uses: octokit/request-action@v2.4.0
      id: check_last_run
      with:
        route: GET /repos/${{github.repository}}/actions/workflows/nightly_tests.yml/runs?per_page=1&status=completed
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    - run: "echo Last nightly build: ${{ fromJson(steps.check_last_run.outputs.data).workflow_runs[0].head_sha }}"
  build:
    needs: [check_last_nightly_run]
    if: needs.check_last_nightly_run.outputs.last_sha != github.sha
    uses: ./.github/workflows/nightly_tests.yml
    secrets: inherit

================================================
FILE: .gitignore
================================================
target/
Cargo.lock

.vscode/
.idea/

ptx/lib/zluda_ptx_impl.ll


================================================
FILE: .gitmodules
================================================
[submodule "ext/llvm-project"]
	path = ext/llvm-project
	url = https://github.com/vosen/llvm-project.git
	branch = main
	shallow = true
[submodule "ext/HiGHS"]
	path = ext/HiGHS
	url = https://github.com/ERGO-Code/HiGHS.git
	shallow = true


================================================
FILE: .rustfmt.toml
================================================
newline_style = "Unix"


================================================
FILE: Cargo.toml
================================================
[workspace]

resolver = "2"

members = [
    "cuda_check",
    "cuda_macros",
    "cuda_types",
    "dark_api",
    "detours-sys",
    "ext/highs-sys",
    "ext/hip_runtime-sys",
    "ext/hipblaslt-sys",
    "ext/miopen-sys",
    "ext/rocblas-sys",
    "format",
    "ptx",
    "ptx_parser",
    "ptx_parser_macros",
    "ptx_parser_macros_impl",
    "ptxas",
    "xtask",
    "zluda",
    "zluda_bindgen",
    "zluda_blas",
    "zluda_blaslt",
    "zluda_cache",
    "zluda_common",
    "zluda_dnn",
    "zluda_dnn8",
    "zluda_dnn9",
    "zluda_trace",
    "zluda_trace_blas",
    "zluda_trace_blaslt",
    "zluda_trace_common",
    "zluda_trace_dnn8",
    "zluda_trace_dnn9",
    "zluda_trace_fft",
    "zluda_trace_nvml",
    "zluda_trace_sparse",
    "zluda_fft",
    "zluda_inject",
    "zluda_ld",
    "zluda_ml",
    "zluda_precompile",
    "zluda_redirect",
    "zluda_sparse",
    "compiler",
]

default-members = ["zluda", "zluda_ml", "zluda_inject", "zluda_redirect", "compiler"]

[profile.release-lto]
inherits = "release"
codegen-units = 1
lto = true

# By default (even in dev) we build LLVM in Release (opt-level is controlled 
# by cmake). That's because LLVM in Debug is excruciatingly slow and makes any
# kind of debugging impossible. This profile is a special configuration for when
# you want to build LLVM in Debug
[profile.dev-llvm]
inherits = "dev"

[profile.dev-llvm.package.xtask]
opt-level = 2

[profile.dev.package.xtask]
opt-level = 2

[patch.crates-io]
highs-sys = { path = "ext/highs-sys" }


================================================
FILE: LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS


================================================
FILE: LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: README.md
================================================
ZLUDA is a drop-in replacement for CUDA on non-NVIDIA GPUs. ZLUDA allows running unmodified CUDA applications using non-NVIDIA GPUs with near-native performance

<div align="center">

<!-- 80x28 104.75x28  62x28-->
[<img src="https://img.shields.io/badge/quick start-green?style=for-the-badge&logo=readthedocs&logoColor=white" width="267.5" height="56">](https://zluda.readthedocs.io) [<img src="https://img.shields.io/badge/Discord-%235865F2.svg?style=for-the-badge&logo=discord&logoColor=white" width="209.5" height="56">](https://discord.gg/sg6BNzXuc7) [<img src="https://img.shields.io/badge/news-red?style=for-the-badge&logo=book&logoColor=white" width="124" height="56">](https://vosen.github.io/ZLUDA/)

<div/>


================================================
FILE: compiler/Cargo.toml
================================================
[package]
name = "compiler"
description = "ZLUDA offline compiler"
version = "0.0.0"
authors = ["Joëlle van Essen <joelle@v-essen.nl>"]
edition = "2021"

[[bin]]
name = "zoc"
path = "src/main.rs"

[dependencies]
bpaf = { version = "0.9.19", features = ["derive", "bright-color"] }
llvm_zluda = { path = "../llvm_zluda" }
ptx = { path = "../ptx" }
ptx_parser = { path = "../ptx_parser" }
libloading = "0.8"
thiserror = "2.0.12"

[package.metadata.zluda]
debug_only = true


================================================
FILE: compiler/src/error.rs
================================================
use ptx::TranslateError;
use ptx_parser::PtxError;
use std::ffi::FromBytesUntilNulError;
use std::io;
use std::str::Utf8Error;

#[derive(Debug, thiserror::Error)]
pub enum CompilerError {
    #[error("HIP error code: {0:?}")]
    HipError(u32),
    #[error(transparent)]
    Libloading(#[from] libloading::Error),
    #[error(transparent)]
    IoError(#[from] io::Error),
    #[error(transparent)]
    Utf8Error(#[from] Utf8Error),
    #[error(transparent)]
    FromBytesUntilNulError(#[from] FromBytesUntilNulError),
    #[error("{message}")]
    GenericError {
        #[source]
        cause: Option<Box<dyn std::error::Error>>,
        message: String,
    },
}

impl From<Vec<PtxError<'_>>> for CompilerError {
    fn from(causes: Vec<PtxError>) -> Self {
        let errors: Vec<String> = causes
            .iter()
            .map(|e| {
                let msg = match e {
                    PtxError::UnrecognizedStatement(value)
                    | PtxError::UnrecognizedDirective(value) => value.to_string(),
                    other => other.to_string(),
                };
                format!("PtxError::{}: {}", e.as_ref(), msg)
            })
            .collect();
        let message = errors.join("\n");
        CompilerError::GenericError {
            cause: None,
            message,
        }
    }
}

impl From<TranslateError> for CompilerError {
    fn from(cause: TranslateError) -> Self {
        let message = format!("PTX TranslateError::{}", cause.as_ref());
        let cause = Some(Box::new(cause) as Box<dyn std::error::Error>);
        CompilerError::GenericError { cause, message }
    }
}

impl From<String> for CompilerError {
    fn from(message: String) -> Self {
        Self::GenericError {
            cause: None,
            message,
        }
    }
}


================================================
FILE: compiler/src/main.rs
================================================
use bpaf::Bpaf;
use error::CompilerError;
use std::ffi::CStr;
use std::fs::{self, File};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::process::ExitCode;
use std::str;
use std::time::Instant;
use std::{env, mem};

mod error;

const DEFAULT_ARCH: &'static str = "gfx1100";

#[derive(Debug, Clone, Bpaf)]
#[bpaf(options, version)]
pub struct Options {
    #[bpaf(argument("output-dir"))]
    /// Output directory
    output_dir: Option<PathBuf>,

    #[bpaf(long("arch"))]
    /// Target GPU architecture
    arch: Option<String>,

    #[bpaf(long("ignore-errors"))]
    /// Try to ignore errors. This will try and produce output even if there are
    /// parsing errors (e.g. an unimplemented instruction)
    ignore_errors: bool,

    #[bpaf(positional("filename"))]
    /// PTX file
    ptx_path: String,
}

fn main() -> ExitCode {
    if let Err(e) = main_core() {
        eprintln!("Error: {}", e);
        return ExitCode::FAILURE;
    }
    ExitCode::SUCCESS
}

fn main_core() -> Result<(), CompilerError> {
    let opts = options().run();

    let ptx_path = Path::new(&opts.ptx_path).to_path_buf();
    let filename_base = ptx_path
        .file_name()
        .map(|osstr| osstr.to_str().unwrap_or("output"))
        .unwrap_or("output");

    let mut output_path = match opts.output_dir {
        Some(value) => {
            std::fs::create_dir_all(&value)?;
            value
        }
        None => match ptx_path.parent() {
            Some(dir) => dir.to_path_buf(),
            None => env::current_dir()?,
        },
    };
    output_path.push(filename_base);

    let arch: String = match opts.arch {
        Some(s) => s,
        None => (|| {
            let runtime = hip::Runtime::load()?;
            runtime.init()?;
            get_gpu_arch(&runtime)
        })()
        .unwrap_or_else(|_| DEFAULT_ARCH.to_owned()),
    };

    let ptx = fs::read(&ptx_path).map_err(CompilerError::from)?;
    let ptx = str::from_utf8(&ptx).map_err(CompilerError::from)?;
    let llvm = ptx_to_llvm(opts.ignore_errors, ptx).map_err(CompilerError::from)?;

    write_to_file(&llvm.llvm_ir, output_path.with_extension("ll").as_path())?;

    let compiler_hook = |bytes: &Vec<u8>, extension: String| {
        let output_path = output_path.with_extension(extension);
        write_to_file(bytes, &output_path).unwrap();
    };

    let mut start = Instant::now();
    llvm_zluda::compile(
        &llvm.context,
        &arch,
        llvm.main,
        &llvm.linked_bitcode,
        llvm.attributes,
        Some(&compiler_hook),
    )?;
    report_pass_time("compile_bitcode", &mut start);

    Ok(())
}

fn ptx_to_llvm(ignore_errors: bool, ptx: &str) -> Result<LLVMArtifacts, CompilerError> {
    let ast = if ignore_errors {
        ptx_parser::parse_module_unchecked(ptx)
    } else {
        ptx_parser::parse_module_checked(ptx).map_err(CompilerError::from)?
    };
    let mut start = Instant::now();
    let module = ptx::to_llvm_module(
        ast,
        ptx::Attributes {
            clock_rate: 2124000,
        },
        |pass| {
            report_pass_time(pass, &mut start);
        },
    )
    .map_err(CompilerError::from)?;
    let llvm_ir = module.llvm_ir.print_module_to_string().to_bytes().to_vec();
    let linked_bitcode = module.linked_bitcode().to_vec();
    let main = module.llvm_ir;
    let attributes = module.attributes_ir;
    Ok(LLVMArtifacts {
        context: module.context,
        main,
        linked_bitcode,
        attributes,
        llvm_ir,
    })
}

fn report_pass_time(pass: &str, start: &mut Instant) {
    let duration = start.elapsed();
    println!("Pass {:?} took {:?}", pass, duration);
    *start = Instant::now();
}

struct LLVMArtifacts {
    main: llvm_zluda::utils::Module,
    attributes: llvm_zluda::utils::Module,
    context: llvm_zluda::utils::Context,
    linked_bitcode: Vec<u8>,
    llvm_ir: Vec<u8>,
}

fn get_gpu_arch(runtime: &hip::Runtime) -> Result<String, CompilerError> {
    let mut dev_props = unsafe { mem::zeroed() };
    runtime.device_get_properties(&mut dev_props, 0)?;
    let gcn_arch_name = &dev_props.gcnArchName;
    let gcn_arch_name = unsafe { CStr::from_ptr(gcn_arch_name.as_ptr()) };
    let gcn_arch_name = gcn_arch_name.to_str()?;
    Ok(gcn_arch_name.to_string())
}

fn write_to_file(content: &[u8], path: &Path) -> io::Result<()> {
    let mut file = File::create(path)?;
    file.write_all(content)?;
    file.flush()?;
    println!("Wrote to {}", path.to_str().unwrap());
    Ok(())
}

mod hip {
    use crate::error::CompilerError;

    // We lazy load HIP runtime because we want to work on systems with no
    // HIP driver installed
    pub struct Runtime(libloading::Library);

    impl Runtime {
        fn hip_check(err: u32) -> Result<(), CompilerError> {
            match err {
                0 => Ok(()),
                err_code => Err(CompilerError::HipError(err_code)),
            }
        }

        pub fn load() -> Result<Self, CompilerError> {
            #[cfg(windows)]
            let lib_name_6 = "amdhip64_6.dll\0";
            #[cfg(windows)]
            let lib_name_7 = "amdhip64_7.dll\0";
            #[cfg(unix)]
            let lib_name_6 = "libamdhip64.so.6\0";
            #[cfg(unix)]
            let lib_name_7 = "libamdhip64.so.7\0";
            let library = unsafe {
                libloading::Library::new(lib_name_7)
                    .or_else(|_| libloading::Library::new(lib_name_6))?
            };
            Ok(Self(library))
        }

        pub fn init(&self) -> Result<(), CompilerError> {
            unsafe {
                let hip_init: libloading::Symbol<unsafe extern "C" fn(u32) -> u32> =
                    self.0.get(b"hipInit\0")?;
                Self::hip_check(hip_init(0))
            }
        }

        pub fn device_get_properties(
            &self,
            prop: &mut hipDeviceProp_tR0600,
            device: i32,
        ) -> Result<(), CompilerError> {
            unsafe {
                let hip_get_device_properties: libloading::Symbol<
                    unsafe extern "C" fn(*mut hipDeviceProp_tR0600, i32) -> u32,
                > = self.0.get(b"hipGetDevicePropertiesR0600\0")?;
                Self::hip_check(hip_get_device_properties(prop, device))
            }
        }
    }

    #[allow(non_snake_case, non_camel_case_types)]
    #[repr(C)]
    #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
    pub struct hipDeviceProp_tR0600 {
        ///< Device name.
        pub name: [::core::ffi::c_char; 256usize],
        ///< UUID of a device
        pub uuid: hipUUID,
        ///< 8-byte unique identifier. Only valid on windows
        pub luid: [::core::ffi::c_char; 8usize],
        ///< LUID node mask
        pub luidDeviceNodeMask: ::core::ffi::c_uint,
        ///< Size of global memory region (in bytes).
        pub totalGlobalMem: usize,
        ///< Size of shared memory per block (in bytes).
        pub sharedMemPerBlock: usize,
        ///< Registers per block.
        pub regsPerBlock: ::core::ffi::c_int,
        ///< Warp size.
        pub warpSize: ::core::ffi::c_int,
        /**< Maximum pitch in bytes allowed by memory copies
        < pitched memory*/
        pub memPitch: usize,
        ///< Max work items per work group or workgroup max size.
        pub maxThreadsPerBlock: ::core::ffi::c_int,
        ///< Max number of threads in each dimension (XYZ) of a block.
        pub maxThreadsDim: [::core::ffi::c_int; 3usize],
        ///< Max grid dimensions (XYZ).
        pub maxGridSize: [::core::ffi::c_int; 3usize],
        ///< Max clock frequency of the multiProcessors in khz.
        pub clockRate: ::core::ffi::c_int,
        /**< Size of shared constant memory region on the device
        < (in bytes).*/
        pub totalConstMem: usize,
        /**< Major compute capability.  On HCC, this is an approximation and features may
        < differ from CUDA CC.  See the arch feature flags for portable ways to query
        < feature caps.*/
        pub major: ::core::ffi::c_int,
        /**< Minor compute capability.  On HCC, this is an approximation and features may
        < differ from CUDA CC.  See the arch feature flags for portable ways to query
        < feature caps.*/
        pub minor: ::core::ffi::c_int,
        ///< Alignment requirement for textures
        pub textureAlignment: usize,
        ///< Pitch alignment requirement for texture references bound to
        pub texturePitchAlignment: usize,
        ///< Deprecated. Use asyncEngineCount instead
        pub deviceOverlap: ::core::ffi::c_int,
        ///< Number of multi-processors (compute units).
        pub multiProcessorCount: ::core::ffi::c_int,
        ///< Run time limit for kernels executed on the device
        pub kernelExecTimeoutEnabled: ::core::ffi::c_int,
        ///< APU vs dGPU
        pub integrated: ::core::ffi::c_int,
        ///< Check whether HIP can map host memory
        pub canMapHostMemory: ::core::ffi::c_int,
        ///< Compute mode.
        pub computeMode: ::core::ffi::c_int,
        ///< Maximum number of elements in 1D images
        pub maxTexture1D: ::core::ffi::c_int,
        ///< Maximum 1D mipmap texture size
        pub maxTexture1DMipmap: ::core::ffi::c_int,
        ///< Maximum size for 1D textures bound to linear memory
        pub maxTexture1DLinear: ::core::ffi::c_int,
        ///< Maximum dimensions (width, height) of 2D images, in image elements
        pub maxTexture2D: [::core::ffi::c_int; 2usize],
        ///< Maximum number of elements in 2D array mipmap of images
        pub maxTexture2DMipmap: [::core::ffi::c_int; 2usize],
        ///< Maximum 2D tex dimensions if tex are bound to pitched memory
        pub maxTexture2DLinear: [::core::ffi::c_int; 3usize],
        ///< Maximum 2D tex dimensions if gather has to be performed
        pub maxTexture2DGather: [::core::ffi::c_int; 2usize],
        /**< Maximum dimensions (width, height, depth) of 3D images, in image
        < elements*/
        pub maxTexture3D: [::core::ffi::c_int; 3usize],
        ///< Maximum alternate 3D texture dims
        pub maxTexture3DAlt: [::core::ffi::c_int; 3usize],
        ///< Maximum cubemap texture dims
        pub maxTextureCubemap: ::core::ffi::c_int,
        ///< Maximum number of elements in 1D array images
        pub maxTexture1DLayered: [::core::ffi::c_int; 2usize],
        ///< Maximum number of elements in 2D array images
        pub maxTexture2DLayered: [::core::ffi::c_int; 3usize],
        ///< Maximum cubemaps layered texture dims
        pub maxTextureCubemapLayered: [::core::ffi::c_int; 2usize],
        ///< Maximum 1D surface size
        pub maxSurface1D: ::core::ffi::c_int,
        ///< Maximum 2D surface size
        pub maxSurface2D: [::core::ffi::c_int; 2usize],
        ///< Maximum 3D surface size
        pub maxSurface3D: [::core::ffi::c_int; 3usize],
        ///< Maximum 1D layered surface size
        pub maxSurface1DLayered: [::core::ffi::c_int; 2usize],
        ///< Maximum 2D layared surface size
        pub maxSurface2DLayered: [::core::ffi::c_int; 3usize],
        ///< Maximum cubemap surface size
        pub maxSurfaceCubemap: ::core::ffi::c_int,
        ///< Maximum cubemap layered surface size
        pub maxSurfaceCubemapLayered: [::core::ffi::c_int; 2usize],
        ///< Alignment requirement for surface
        pub surfaceAlignment: usize,
        ///< Device can possibly execute multiple kernels concurrently.
        pub concurrentKernels: ::core::ffi::c_int,
        ///< Device has ECC support enabled
        pub ECCEnabled: ::core::ffi::c_int,
        ///< PCI Bus ID.
        pub pciBusID: ::core::ffi::c_int,
        ///< PCI Device ID.
        pub pciDeviceID: ::core::ffi::c_int,
        ///< PCI Domain ID
        pub pciDomainID: ::core::ffi::c_int,
        ///< 1:If device is Tesla device using TCC driver, else 0
        pub tccDriver: ::core::ffi::c_int,
        ///< Number of async engines
        pub asyncEngineCount: ::core::ffi::c_int,
        ///< Does device and host share unified address space
        pub unifiedAddressing: ::core::ffi::c_int,
        ///< Max global memory clock frequency in khz.
        pub memoryClockRate: ::core::ffi::c_int,
        ///< Global memory bus width in bits.
        pub memoryBusWidth: ::core::ffi::c_int,
        ///< L2 cache size.
        pub l2CacheSize: ::core::ffi::c_int,
        ///< Device's max L2 persisting lines in bytes
        pub persistingL2CacheMaxSize: ::core::ffi::c_int,
        ///< Maximum resident threads per multi-processor.
        pub maxThreadsPerMultiProcessor: ::core::ffi::c_int,
        ///< Device supports stream priority
        pub streamPrioritiesSupported: ::core::ffi::c_int,
        ///< Indicates globals are cached in L1
        pub globalL1CacheSupported: ::core::ffi::c_int,
        ///< Locals are cahced in L1
        pub localL1CacheSupported: ::core::ffi::c_int,
        ///< Amount of shared memory available per multiprocessor.
        pub sharedMemPerMultiprocessor: usize,
        ///< registers available per multiprocessor
        pub regsPerMultiprocessor: ::core::ffi::c_int,
        ///< Device supports allocating managed memory on this system
        pub managedMemory: ::core::ffi::c_int,
        ///< 1 if device is on a multi-GPU board, 0 if not.
        pub isMultiGpuBoard: ::core::ffi::c_int,
        ///< Unique identifier for a group of devices on same multiboard GPU
        pub multiGpuBoardGroupID: ::core::ffi::c_int,
        ///< Link between host and device supports native atomics
        pub hostNativeAtomicSupported: ::core::ffi::c_int,
        ///< Deprecated. CUDA only.
        pub singleToDoublePrecisionPerfRatio: ::core::ffi::c_int,
        /**< Device supports coherently accessing pageable memory
        < without calling hipHostRegister on it*/
        pub pageableMemoryAccess: ::core::ffi::c_int,
        /**< Device can coherently access managed memory concurrently with
        < the CPU*/
        pub concurrentManagedAccess: ::core::ffi::c_int,
        ///< Is compute preemption supported on the device
        pub computePreemptionSupported: ::core::ffi::c_int,
        /**< Device can access host registered memory with same
        < address as the host*/
        pub canUseHostPointerForRegisteredMem: ::core::ffi::c_int,
        ///< HIP device supports cooperative launch
        pub cooperativeLaunch: ::core::ffi::c_int,
        /**< HIP device supports cooperative launch on multiple
        < devices*/
        pub cooperativeMultiDeviceLaunch: ::core::ffi::c_int,
        ///< Per device m ax shared mem per block usable by special opt in
        pub sharedMemPerBlockOptin: usize,
        /**< Device accesses pageable memory via the host's
        < page tables*/
        pub pageableMemoryAccessUsesHostPageTables: ::core::ffi::c_int,
        /**< Host can directly access managed memory on the device
        < without migration*/
        pub directManagedMemAccessFromHost: ::core::ffi::c_int,
        ///< Max number of blocks on CU
        pub maxBlocksPerMultiProcessor: ::core::ffi::c_int,
        ///< Max value of access policy window
        pub accessPolicyMaxWindowSize: ::core::ffi::c_int,
        ///< Shared memory reserved by driver per block
        pub reservedSharedMemPerBlock: usize,
        ///< Device supports hipHostRegister
        pub hostRegisterSupported: ::core::ffi::c_int,
        ///< Indicates if device supports sparse hip arrays
        pub sparseHipArraySupported: ::core::ffi::c_int,
        /**< Device supports using the hipHostRegisterReadOnly flag
        < with hipHostRegistger*/
        pub hostRegisterReadOnlySupported: ::core::ffi::c_int,
        ///< Indicates external timeline semaphore support
        pub timelineSemaphoreInteropSupported: ::core::ffi::c_int,
        ///< Indicates if device supports hipMallocAsync and hipMemPool APIs
        pub memoryPoolsSupported: ::core::ffi::c_int,
        ///< Indicates device support of RDMA APIs
        pub gpuDirectRDMASupported: ::core::ffi::c_int,
        /**< Bitmask to be interpreted according to
        < hipFlushGPUDirectRDMAWritesOptions*/
        pub gpuDirectRDMAFlushWritesOptions: ::core::ffi::c_uint,
        ///< value of hipGPUDirectRDMAWritesOrdering
        pub gpuDirectRDMAWritesOrdering: ::core::ffi::c_int,
        ///< Bitmask of handle types support with mempool based IPC
        pub memoryPoolSupportedHandleTypes: ::core::ffi::c_uint,
        /**< Device supports deferred mapping HIP arrays and HIP
        < mipmapped arrays*/
        pub deferredMappingHipArraySupported: ::core::ffi::c_int,
        ///< Device supports IPC events
        pub ipcEventSupported: ::core::ffi::c_int,
        ///< Device supports cluster launch
        pub clusterLaunch: ::core::ffi::c_int,
        ///< Indicates device supports unified function pointers
        pub unifiedFunctionPointers: ::core::ffi::c_int,
        ///< CUDA Reserved.
        pub reserved: [::core::ffi::c_int; 63usize],
        ///< Reserved for adding new entries for HIP/CUDA.
        pub hipReserved: [::core::ffi::c_int; 32usize],
        ///< AMD GCN Arch Name. HIP Only.
        pub gcnArchName: [::core::ffi::c_char; 256usize],
        ///< Maximum Shared Memory Per CU. HIP Only.
        pub maxSharedMemoryPerMultiProcessor: usize,
        /**< Frequency in khz of the timer used by the device-side "clock*"
        < instructions.  New for HIP.*/
        pub clockInstructionRate: ::core::ffi::c_int,
        ///< Architectural feature flags.  New for HIP.
        pub arch: hipDeviceArch_t,
        ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
        pub hdpMemFlushCntl: *mut ::core::ffi::c_uint,
        ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
        pub hdpRegFlushCntl: *mut ::core::ffi::c_uint,
        /**< HIP device supports cooperative launch on
        < multiple*/
        pub cooperativeMultiDeviceUnmatchedFunc: ::core::ffi::c_int,
        /**< HIP device supports cooperative launch on
        < multiple*/
        pub cooperativeMultiDeviceUnmatchedGridDim: ::core::ffi::c_int,
        /**< HIP device supports cooperative launch on
        < multiple*/
        pub cooperativeMultiDeviceUnmatchedBlockDim: ::core::ffi::c_int,
        /**< HIP device supports cooperative launch on
        < multiple*/
        pub cooperativeMultiDeviceUnmatchedSharedMem: ::core::ffi::c_int,
        ///< 1: if it is a large PCI bar device, else 0
        pub isLargeBar: ::core::ffi::c_int,
        ///< Revision of the GPU in this device
        pub asicRevision: ::core::ffi::c_int,
    }

    #[allow(non_snake_case, non_camel_case_types)]
    #[repr(C)]
    #[repr(align(4))]
    #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
    pub struct hipDeviceArch_t {
        pub _bitfield_align_1: [u8; 0],
        pub _bitfield_1: __BindgenBitfieldUnit<[u8; 3usize]>,
        pub __bindgen_padding_0: u8,
    }

    #[repr(C)]
    #[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
    pub struct __BindgenBitfieldUnit<Storage> {
        storage: Storage,
    }

    #[allow(non_camel_case_types)]
    #[repr(C)]
    #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
    pub struct hipUUID_t {
        pub bytes: [::core::ffi::c_char; 16usize],
    }
    #[allow(non_camel_case_types)]
    pub type hipUUID = hipUUID_t;
}


================================================
FILE: cuda_check/Cargo.toml
================================================
[package]
name = "cuda_check"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"

[[bin]]
name = "cuda_check"
path = "src/main.rs"

[dependencies]
cuda_types = { path = "../cuda_types" }
zluda_windows = { path = "../zluda_windows" }
bpaf = { version = "0.9.19", features = ["derive", "bright-color"] }
owo-colors = { version = "4", features = ["supports-colors"] }
rand = "0.9.2"
libloading = "0.8"

[target.'cfg(windows)'.dependencies]
windows = { version = "0.62.2", features = ["Win32_Foundation"] }


[package.metadata.zluda]
windows_only = true


================================================
FILE: cuda_check/src/main.rs
================================================
#[cfg(windows)]
mod win;

#[cfg(windows)]
fn main() {
    win::main()
}

#[cfg(not(windows))]
fn main() {}


================================================
FILE: cuda_check/src/win.rs
================================================
use bpaf::{construct, pure, Bpaf, Parser};
use owo_colors::{OwoColorize, Stream};
use rand::seq::SliceRandom;
use std::{ffi::OsString, mem};
use windows::Win32::Foundation::HMODULE;
use zluda_windows::{get_module_path_utf16, LibraryInfo};

#[derive(Debug, Bpaf)]
#[bpaf(options)]
struct Options {
    #[bpaf(external)]
    libraries: Vec<&'static LibraryInfo>,
    #[bpaf(switch)]
    driver_first: bool,
}

fn libraries() -> impl Parser<Vec<&'static LibraryInfo>> {
    zluda_windows::LIBRARIES.iter().fold(
        Parser::boxed(pure(Vec::with_capacity(zluda_windows::LIBRARIES.len()))),
        |parser, library| {
            let dlls = library.ascii_name;
            let arg = bpaf::long(library.short_name)
                .help(&*format!("Look for {dlls}"))
                .switch();
            construct!(parser, arg)
                .map(move |(mut acc, cur)| {
                    if cur {
                        acc.push(library);
                    }
                    acc
                })
                .boxed()
        },
    )
}

pub fn main() {
    let mut opts = options().run();
    if opts.libraries.is_empty() {
        opts.libraries = zluda_windows::LIBRARIES.iter().collect();
    }
    let mut lib_set = opts.libraries;
    if !opts.driver_first {
        lib_set.shuffle(&mut rand::rng());
    } else {
        let (_, remainder) = lib_set.split_first_mut().unwrap();
        remainder.shuffle(&mut rand::rng());
    }
    for lib in lib_set {
        print_result(lib.short_name, unsafe { try_load_library(lib) });
    }
}

fn print_result(short_name: &str, lib: Result<Option<OsString>, Error>) {
    print!("{:<10}: ", short_name);
    match lib {
        Ok(None) => println!(
            "{}",
            "OK".if_supports_color(Stream::Stdout, |text| text.green())
        ),
        Ok(Some(path)) => println!(
            "{} ({})",
            "OK".if_supports_color(Stream::Stdout, |text| text.green()),
            path.display()
        ),
        Err(err) => println!(
            "{}",
            format!("ERROR: {:?}", err).if_supports_color(Stream::Stdout, |text| text.red())
        ),
    }
}

unsafe fn try_load_library(lib: &LibraryInfo) -> Result<Option<OsString>, Error> {
    let library = if lib.in_system32 {
        libloading::Library::new(lib.ascii_name)?
    } else {
        match std::env::var("CUDA_PATH") {
            Ok(cuda_path) => {
                let path = std::path::Path::new(&cuda_path)
                    .join("bin")
                    .join("x64")
                    .join(lib.ascii_name);
                libloading::Library::new(path)?
            }
            Err(_) => libloading::Library::new(lib.ascii_name)?,
        }
    };
    match lib.short_name {
        "nvcuda" => check_cuda(library),
        "nvml" => check_nvml(library),
        "cudnn8" => check_cudnn8(library),
        "cudnn9" => check_cudnn9(library),
        "cublas11" => check_cublas(library),
        "cublas12" => check_cublas(library),
        "cublas13" => check_cublas(library),
        "cublaslt11" => check_cublaslt(library),
        "cublaslt12" => check_cublaslt(library),
        "cublaslt13" => check_cublaslt(library),
        "cusparse10" => check_cusparse(library),
        "cusparse11" => check_cusparse(library),
        "cusparse12" => check_cusparse(library),
        "cufft10" => check_cufft(library),
        "cufft11" => check_cufft(library),
        "cufft12" => check_cufft(library),
        _ => Err(Error::Initialization(
            format!("Library check not implemented for {}", lib.short_name),
            0,
        )),
    }
}

unsafe fn path_for_loaded_lib(lib: &'static str) -> Option<OsString> {
    let lib = libloading::os::windows::Library::open_already_loaded(lib).ok()?;
    let lib_handle = lib.into_raw();
    let path = get_module_path_utf16(HMODULE(lib_handle as _));
    libloading::os::windows::Library::from_raw(lib_handle);
    Some(path)
}

unsafe fn check_cufft(library: libloading::Library) -> Result<Option<OsString>, Error> {
    let hip_path = || path_for_loaded_lib("hipfft.dll");
    let cufft_create = library.get::<extern "system" fn(
        handle: *mut cuda_types::cufft::cufftHandle,
    ) -> cuda_types::cufft::cufftResult>(b"cufftCreate\0")?;
    let cufft_destroy = library.get::<extern "system" fn(
        handle: cuda_types::cufft::cufftHandle,
    ) -> cuda_types::cufft::cufftResult>(b"cufftDestroy\0")?;
    let mut handle = mem::zeroed();
    match cufft_create(&mut handle) {
        Ok(()) => {}
        Err(cuda_types::cufft::cufftError_t::NOT_SUPPORTED) => {
            return Ok(hip_path());
        }
        Err(err) => {
            return Err(Error::Initialization(
                "cufftCreate".to_string(),
                err.0.get() as usize,
            ));
        }
    }
    let result = hip_path();
    cufft_destroy(handle)
        .map_err(|err| Error::Initialization("cufftDestroy".to_string(), err.0.get() as usize))?;
    Ok(result)
}

unsafe fn check_cublas(library: libloading::Library) -> Result<Option<OsString>, Error> {
    let hip_path = || path_for_loaded_lib("rocblas.dll");
    let cublas_create = library.get::<extern "system" fn(
        handle: *mut cuda_types::cublas::cublasHandle_t,
    ) -> cuda_types::cublas::cublasStatus_t>(b"cublasCreate_v2\0")?;
    let cublas_destroy =
        library.get::<extern "system" fn(
            handle: cuda_types::cublas::cublasHandle_t,
        ) -> cuda_types::cublas::cublasStatus_t>(b"cublasDestroy_v2\0")?;
    let mut handle = mem::zeroed();
    cublas_create(&mut handle).map_err(|err| {
        Error::Initialization("cublasCreate_v2".to_string(), err.0.get() as usize)
    })?;
    let result = hip_path();
    cublas_destroy(handle).map_err(|err| {
        Error::Initialization("cublasDestroy_v2".to_string(), err.0.get() as usize)
    })?;
    Ok(result)
}

unsafe fn check_cusparse(library: libloading::Library) -> Result<Option<OsString>, Error> {
    let hip_path = || path_for_loaded_lib("rocsparse.dll");
    let cusparse_create =
        library.get::<extern "system" fn(
            handle: *mut cuda_types::cusparse::cusparseHandle_t,
        ) -> cuda_types::cusparse::cusparseStatus_t>(b"cusparseCreate\0")?;
    let cusparse_destroy =
        library.get::<extern "system" fn(
            handle: cuda_types::cusparse::cusparseHandle_t,
        ) -> cuda_types::cusparse::cusparseStatus_t>(b"cusparseDestroy\0")?;
    let mut handle = mem::zeroed();
    match cusparse_create(&mut handle) {
        Ok(()) => {}
        Err(cuda_types::cusparse::cusparseError_t::NOT_SUPPORTED) => {
            return Ok(hip_path());
        }
        Err(err) => {
            return Err(Error::Initialization(
                "cusparseCreate".to_string(),
                err.0.get() as usize,
            ));
        }
    }
    let result = hip_path();
    cusparse_destroy(handle).map_err(|err| {
        Error::Initialization("cusparseDestroy".to_string(), err.0.get() as usize)
    })?;
    Ok(result)
}

unsafe fn check_cublaslt(library: libloading::Library) -> Result<Option<OsString>, Error> {
    let hip_path =
        || path_for_loaded_lib("hipblaslt.dll").or_else(|| path_for_loaded_lib("libhipblaslt.dll"));
    let cublaslt_create =
        library.get::<extern "system" fn(
            handle: *mut cuda_types::cublaslt::cublasLtHandle_t,
        ) -> cuda_types::cublas::cublasStatus_t>(b"cublasLtCreate\0")?;
    let cublaslt_destroy =
        library.get::<extern "system" fn(
            handle: cuda_types::cublaslt::cublasLtHandle_t,
        ) -> cuda_types::cublas::cublasStatus_t>(b"cublasLtDestroy\0")?;
    let mut handle = mem::zeroed();
    cublaslt_create(&mut handle)
        .map_err(|err| Error::Initialization("cublasLtCreate".to_string(), err.0.get() as usize))?;
    let result = hip_path();
    cublaslt_destroy(handle).map_err(|err| {
        Error::Initialization("cublasLtDestroy".to_string(), err.0.get() as usize)
    })?;
    Ok(result)
}

unsafe fn check_cuda(library: libloading::Library) -> Result<Option<OsString>, Error> {
    let cu_init = library
        .get::<extern "system" fn(::core::ffi::c_uint) -> cuda_types::cuda::CUresult>(
            b"cuInit\0",
        )?;
    cu_init(0).map_err(|err| Error::Initialization("cuInit".to_string(), err.0.get() as usize))?;
    Ok(path_for_loaded_lib("amdhip64_7.dll").or_else(|| path_for_loaded_lib("amdhip64_6.dll")))
}

unsafe fn check_nvml(library: libloading::Library) -> Result<Option<OsString>, Error> {
    use cuda_types::nvml::nvmlReturn_tConsts;
    let nvml_init =
        library.get::<extern "system" fn() -> cuda_types::nvml::nvmlReturn_t>(b"nvmlInit_v2\0")?;
    match nvml_init() {
        Ok(()) | cuda_types::nvml::nvmlReturn_t::ERROR_NOT_SUPPORTED => {
            Ok(path_for_loaded_lib("rocm_smi64.dll"))
        }
        Err(err) => Err(Error::Initialization(
            "nvmlInit_v2".to_string(),
            err.0.get() as usize,
        )),
    }
}

unsafe fn check_cudnn8(library: libloading::Library) -> Result<Option<OsString>, Error> {
    let hip_path = || path_for_loaded_lib("MIOpen.dll");
    let cudnn_create = library.get::<extern "system" fn(
        handle: *mut cuda_types::cudnn8::cudnnHandle_t,
    ) -> cuda_types::cudnn8::cudnnStatus_t>(b"cudnnCreate\0")?;
    let cudnn_destroy = library.get::<extern "system" fn(
        handle: cuda_types::cudnn8::cudnnHandle_t,
    ) -> cuda_types::cudnn8::cudnnStatus_t>(b"cudnnDestroy\0")?;
    let mut handle = mem::zeroed();
    cudnn_create(&mut handle)
        .map_err(|err| Error::Initialization("cudnnCreate".to_string(), err.0.get() as usize))?;
    let result = hip_path();
    cudnn_destroy(handle)
        .map_err(|err| Error::Initialization("cudnnDestroy".to_string(), err.0.get() as usize))?;
    Ok(result)
}

unsafe fn check_cudnn9(library: libloading::Library) -> Result<Option<OsString>, Error> {
    let hip_path = || path_for_loaded_lib("MIOpen.dll");
    let cudnn_create = library.get::<extern "system" fn(
        handle: *mut cuda_types::cudnn9::cudnnHandle_t,
    ) -> cuda_types::cudnn9::cudnnStatus_t>(b"cudnnCreate\0")?;
    let cudnn_destroy = library.get::<extern "system" fn(
        handle: cuda_types::cudnn9::cudnnHandle_t,
    ) -> cuda_types::cudnn9::cudnnStatus_t>(b"cudnnDestroy\0")?;
    let mut handle = mem::zeroed();
    cudnn_create(&mut handle)
        .map_err(|err| Error::Initialization("cudnnCreate".to_string(), err.0.get() as usize))?;
    let result = hip_path();
    cudnn_destroy(handle)
        .map_err(|err| Error::Initialization("cudnnDestroy".to_string(), err.0.get() as usize))?;
    Ok(result)
}

#[derive(Debug)]
#[allow(dead_code)]
enum Error {
    Loading(libloading::Error),
    Initialization(String, usize),
}

impl From<libloading::Error> for Error {
    fn from(err: libloading::Error) -> Self {
        Error::Loading(err)
    }
}


================================================
FILE: cuda_macros/.rustfmt.toml
================================================
disable_all_formatting = true


================================================
FILE: cuda_macros/Cargo.toml
================================================
[package]
name = "cuda_macros"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"

[dependencies]
quote = "1.0"
syn = { version = "2.0", features = ["full", "visit-mut", "extra-traits"] }
proc-macro2 = "1.0"
rustc-hash = "2.0.0"

[lib]
proc-macro = true


================================================
FILE: cuda_macros/build/wrapper.h
================================================
#define __CUDA_API_VERSION_INTERNAL
#include <cuda.h>
#include <cudaProfiler.h>
#include <cudaGL.h>
#include <cudaEGL.h>
#include <vdpau/vdpau.h>
#include <cudaVDPAU.h>


================================================
FILE: cuda_macros/src/cublas.rs
================================================
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
extern "system" {
    #[must_use]
    fn cublasCreate_v2(
        handle: *mut cuda_types::cublas::cublasHandle_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDestroy_v2(
        handle: cuda_types::cublas::cublasHandle_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetVersion_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        version: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetProperty(
        type_: cuda_types::cublas::libraryPropertyType,
        value: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    fn cublasGetCudartVersion() -> usize;
    #[must_use]
    fn cublasSetWorkspace_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        workspace: *mut ::core::ffi::c_void,
        workspaceSizeInBytes: usize,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetStream_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        streamId: cuda_types::cublas::cudaStream_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetStream_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        streamId: *mut cuda_types::cublas::cudaStream_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetPointerMode_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        mode: *mut cuda_types::cublas::cublasPointerMode_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetPointerMode_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        mode: cuda_types::cublas::cublasPointerMode_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetAtomicsMode(
        handle: cuda_types::cublas::cublasHandle_t,
        mode: *mut cuda_types::cublas::cublasAtomicsMode_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetAtomicsMode(
        handle: cuda_types::cublas::cublasHandle_t,
        mode: cuda_types::cublas::cublasAtomicsMode_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetMathMode(
        handle: cuda_types::cublas::cublasHandle_t,
        mode: *mut cuda_types::cublas::cublasMath_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetMathMode(
        handle: cuda_types::cublas::cublasHandle_t,
        mode: cuda_types::cublas::cublasMath_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetSmCountTarget(
        handle: cuda_types::cublas::cublasHandle_t,
        smCountTarget: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetSmCountTarget(
        handle: cuda_types::cublas::cublasHandle_t,
        smCountTarget: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetEmulationStrategy(
        handle: cuda_types::cublas::cublasHandle_t,
        emulationStrategy: *mut cuda_types::cublas::cublasEmulationStrategy_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetEmulationStrategy(
        handle: cuda_types::cublas::cublasHandle_t,
        emulationStrategy: cuda_types::cublas::cublasEmulationStrategy_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    fn cublasGetStatusName(
        status: cuda_types::cublas::cublasStatus_t,
    ) -> *const ::core::ffi::c_char;
    fn cublasGetStatusString(
        status: cuda_types::cublas::cublasStatus_t,
    ) -> *const ::core::ffi::c_char;
    #[must_use]
    fn cublasLoggerConfigure(
        logIsOn: ::core::ffi::c_int,
        logToStdOut: ::core::ffi::c_int,
        logToStdErr: ::core::ffi::c_int,
        logFileName: *const ::core::ffi::c_char,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetLoggerCallback(
        userCallback: cuda_types::cublas::cublasLogCallback,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetLoggerCallback(
        userCallback: *mut cuda_types::cublas::cublasLogCallback,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetVector(
        n: ::core::ffi::c_int,
        elemSize: ::core::ffi::c_int,
        x: *const ::core::ffi::c_void,
        incx: ::core::ffi::c_int,
        devicePtr: *mut ::core::ffi::c_void,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetVector_64(
        n: i64,
        elemSize: i64,
        x: *const ::core::ffi::c_void,
        incx: i64,
        devicePtr: *mut ::core::ffi::c_void,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetVector(
        n: ::core::ffi::c_int,
        elemSize: ::core::ffi::c_int,
        x: *const ::core::ffi::c_void,
        incx: ::core::ffi::c_int,
        y: *mut ::core::ffi::c_void,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetVector_64(
        n: i64,
        elemSize: i64,
        x: *const ::core::ffi::c_void,
        incx: i64,
        y: *mut ::core::ffi::c_void,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetMatrix(
        rows: ::core::ffi::c_int,
        cols: ::core::ffi::c_int,
        elemSize: ::core::ffi::c_int,
        A: *const ::core::ffi::c_void,
        lda: ::core::ffi::c_int,
        B: *mut ::core::ffi::c_void,
        ldb: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetMatrix_64(
        rows: i64,
        cols: i64,
        elemSize: i64,
        A: *const ::core::ffi::c_void,
        lda: i64,
        B: *mut ::core::ffi::c_void,
        ldb: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetMatrix(
        rows: ::core::ffi::c_int,
        cols: ::core::ffi::c_int,
        elemSize: ::core::ffi::c_int,
        A: *const ::core::ffi::c_void,
        lda: ::core::ffi::c_int,
        B: *mut ::core::ffi::c_void,
        ldb: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetMatrix_64(
        rows: i64,
        cols: i64,
        elemSize: i64,
        A: *const ::core::ffi::c_void,
        lda: i64,
        B: *mut ::core::ffi::c_void,
        ldb: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetVectorAsync(
        n: ::core::ffi::c_int,
        elemSize: ::core::ffi::c_int,
        hostPtr: *const ::core::ffi::c_void,
        incx: ::core::ffi::c_int,
        devicePtr: *mut ::core::ffi::c_void,
        incy: ::core::ffi::c_int,
        stream: cuda_types::cublas::cudaStream_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetVectorAsync_64(
        n: i64,
        elemSize: i64,
        hostPtr: *const ::core::ffi::c_void,
        incx: i64,
        devicePtr: *mut ::core::ffi::c_void,
        incy: i64,
        stream: cuda_types::cublas::cudaStream_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetVectorAsync(
        n: ::core::ffi::c_int,
        elemSize: ::core::ffi::c_int,
        devicePtr: *const ::core::ffi::c_void,
        incx: ::core::ffi::c_int,
        hostPtr: *mut ::core::ffi::c_void,
        incy: ::core::ffi::c_int,
        stream: cuda_types::cublas::cudaStream_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetVectorAsync_64(
        n: i64,
        elemSize: i64,
        devicePtr: *const ::core::ffi::c_void,
        incx: i64,
        hostPtr: *mut ::core::ffi::c_void,
        incy: i64,
        stream: cuda_types::cublas::cudaStream_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetMatrixAsync(
        rows: ::core::ffi::c_int,
        cols: ::core::ffi::c_int,
        elemSize: ::core::ffi::c_int,
        A: *const ::core::ffi::c_void,
        lda: ::core::ffi::c_int,
        B: *mut ::core::ffi::c_void,
        ldb: ::core::ffi::c_int,
        stream: cuda_types::cublas::cudaStream_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSetMatrixAsync_64(
        rows: i64,
        cols: i64,
        elemSize: i64,
        A: *const ::core::ffi::c_void,
        lda: i64,
        B: *mut ::core::ffi::c_void,
        ldb: i64,
        stream: cuda_types::cublas::cudaStream_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetMatrixAsync(
        rows: ::core::ffi::c_int,
        cols: ::core::ffi::c_int,
        elemSize: ::core::ffi::c_int,
        A: *const ::core::ffi::c_void,
        lda: ::core::ffi::c_int,
        B: *mut ::core::ffi::c_void,
        ldb: ::core::ffi::c_int,
        stream: cuda_types::cublas::cudaStream_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasGetMatrixAsync_64(
        rows: i64,
        cols: i64,
        elemSize: i64,
        A: *const ::core::ffi::c_void,
        lda: i64,
        B: *mut ::core::ffi::c_void,
        ldb: i64,
        stream: cuda_types::cublas::cudaStream_t,
    ) -> cuda_types::cublas::cublasStatus_t;
    fn cublasXerbla(srName: *const ::core::ffi::c_char, info: ::core::ffi::c_int) -> ();
    #[must_use]
    fn cublasNrm2Ex(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_void,
        resultType: cuda_types::cublas::cudaDataType,
        executionType: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasNrm2Ex_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        result: *mut ::core::ffi::c_void,
        resultType: cuda_types::cublas::cudaDataType,
        executionType: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSnrm2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f32,
        incx: ::core::ffi::c_int,
        result: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSnrm2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f32,
        incx: i64,
        result: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDnrm2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f64,
        incx: ::core::ffi::c_int,
        result: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDnrm2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f64,
        incx: i64,
        result: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasScnrm2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        result: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasScnrm2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        result: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDznrm2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        result: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDznrm2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        result: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDotEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        y: *const ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_void,
        resultType: cuda_types::cublas::cudaDataType,
        executionType: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDotEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        y: *const ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: i64,
        result: *mut ::core::ffi::c_void,
        resultType: cuda_types::cublas::cudaDataType,
        executionType: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDotcEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        y: *const ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_void,
        resultType: cuda_types::cublas::cudaDataType,
        executionType: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDotcEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        y: *const ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: i64,
        result: *mut ::core::ffi::c_void,
        resultType: cuda_types::cublas::cudaDataType,
        executionType: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSdot_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f32,
        incx: ::core::ffi::c_int,
        y: *const f32,
        incy: ::core::ffi::c_int,
        result: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSdot_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f32,
        incx: i64,
        y: *const f32,
        incy: i64,
        result: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDdot_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f64,
        incx: ::core::ffi::c_int,
        y: *const f64,
        incy: ::core::ffi::c_int,
        result: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDdot_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f64,
        incx: i64,
        y: *const f64,
        incy: i64,
        result: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCdotu_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
        result: *mut cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCdotu_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuComplex,
        incy: i64,
        result: *mut cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCdotc_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
        result: *mut cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCdotc_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuComplex,
        incy: i64,
        result: *mut cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZdotu_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
        result: *mut cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZdotu_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: i64,
        result: *mut cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZdotc_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
        result: *mut cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZdotc_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: i64,
        result: *mut cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasScalEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const ::core::ffi::c_void,
        alphaType: cuda_types::cublas::cudaDataType,
        x: *mut ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        executionType: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasScalEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const ::core::ffi::c_void,
        alphaType: cuda_types::cublas::cudaDataType,
        x: *mut ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        executionType: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSscal_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        x: *mut f32,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSscal_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const f32,
        x: *mut f32,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDscal_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        x: *mut f64,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDscal_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const f64,
        x: *mut f64,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCscal_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCscal_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCsscal_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCsscal_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const f32,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZscal_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZscal_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZdscal_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZdscal_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const f64,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasAxpyEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const ::core::ffi::c_void,
        alphaType: cuda_types::cublas::cudaDataType,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        y: *mut ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: ::core::ffi::c_int,
        executiontype: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasAxpyEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const ::core::ffi::c_void,
        alphaType: cuda_types::cublas::cudaDataType,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        y: *mut ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: i64,
        executiontype: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSaxpy_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        x: *const f32,
        incx: ::core::ffi::c_int,
        y: *mut f32,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSaxpy_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const f32,
        x: *const f32,
        incx: i64,
        y: *mut f32,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDaxpy_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        x: *const f64,
        incx: ::core::ffi::c_int,
        y: *mut f64,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDaxpy_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const f64,
        x: *const f64,
        incx: i64,
        y: *mut f64,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCaxpy_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCaxpy_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZaxpy_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZaxpy_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCopyEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        y: *mut ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCopyEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        y: *mut ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasScopy_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f32,
        incx: ::core::ffi::c_int,
        y: *mut f32,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasScopy_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f32,
        incx: i64,
        y: *mut f32,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDcopy_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f64,
        incx: ::core::ffi::c_int,
        y: *mut f64,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDcopy_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f64,
        incx: i64,
        y: *mut f64,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCcopy_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCcopy_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZcopy_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZcopy_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSswap_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut f32,
        incx: ::core::ffi::c_int,
        y: *mut f32,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSswap_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut f32,
        incx: i64,
        y: *mut f32,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDswap_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut f64,
        incx: ::core::ffi::c_int,
        y: *mut f64,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDswap_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut f64,
        incx: i64,
        y: *mut f64,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCswap_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCswap_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZswap_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZswap_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSwapEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        y: *mut ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSwapEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        y: *mut ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIsamax_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f32,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIsamax_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f32,
        incx: i64,
        result: *mut i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIdamax_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f64,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIdamax_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f64,
        incx: i64,
        result: *mut i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIcamax_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIcamax_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        result: *mut i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIzamax_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIzamax_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        result: *mut i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIamaxEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIamaxEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        result: *mut i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIsamin_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f32,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIsamin_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f32,
        incx: i64,
        result: *mut i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIdamin_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f64,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIdamin_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f64,
        incx: i64,
        result: *mut i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIcamin_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIcamin_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        result: *mut i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIzamin_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIzamin_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        result: *mut i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIaminEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasIaminEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        result: *mut i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasAsumEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        result: *mut ::core::ffi::c_void,
        resultType: cuda_types::cublas::cudaDataType,
        executiontype: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasAsumEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        result: *mut ::core::ffi::c_void,
        resultType: cuda_types::cublas::cudaDataType,
        executiontype: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSasum_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f32,
        incx: ::core::ffi::c_int,
        result: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSasum_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f32,
        incx: i64,
        result: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDasum_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const f64,
        incx: ::core::ffi::c_int,
        result: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDasum_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const f64,
        incx: i64,
        result: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasScasum_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        result: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasScasum_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        result: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDzasum_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        result: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDzasum_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        result: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSrot_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut f32,
        incx: ::core::ffi::c_int,
        y: *mut f32,
        incy: ::core::ffi::c_int,
        c: *const f32,
        s: *const f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSrot_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut f32,
        incx: i64,
        y: *mut f32,
        incy: i64,
        c: *const f32,
        s: *const f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDrot_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut f64,
        incx: ::core::ffi::c_int,
        y: *mut f64,
        incy: ::core::ffi::c_int,
        c: *const f64,
        s: *const f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDrot_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut f64,
        incx: i64,
        y: *mut f64,
        incy: i64,
        c: *const f64,
        s: *const f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCrot_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
        c: *const f32,
        s: *const cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCrot_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
        c: *const f32,
        s: *const cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCsrot_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
        c: *const f32,
        s: *const f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCsrot_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
        c: *const f32,
        s: *const f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZrot_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
        c: *const f64,
        s: *const cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZrot_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
        c: *const f64,
        s: *const cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZdrot_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
        c: *const f64,
        s: *const f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZdrot_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
        c: *const f64,
        s: *const f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasRotEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        y: *mut ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: ::core::ffi::c_int,
        c: *const ::core::ffi::c_void,
        s: *const ::core::ffi::c_void,
        csType: cuda_types::cublas::cudaDataType,
        executiontype: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasRotEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        y: *mut ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: i64,
        c: *const ::core::ffi::c_void,
        s: *const ::core::ffi::c_void,
        csType: cuda_types::cublas::cudaDataType,
        executiontype: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSrotg_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        a: *mut f32,
        b: *mut f32,
        c: *mut f32,
        s: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDrotg_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        a: *mut f64,
        b: *mut f64,
        c: *mut f64,
        s: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCrotg_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        a: *mut cuda_types::cublas::cuComplex,
        b: *mut cuda_types::cublas::cuComplex,
        c: *mut f32,
        s: *mut cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZrotg_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        a: *mut cuda_types::cublas::cuDoubleComplex,
        b: *mut cuda_types::cublas::cuDoubleComplex,
        c: *mut f64,
        s: *mut cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasRotgEx(
        handle: cuda_types::cublas::cublasHandle_t,
        a: *mut ::core::ffi::c_void,
        b: *mut ::core::ffi::c_void,
        abType: cuda_types::cublas::cudaDataType,
        c: *mut ::core::ffi::c_void,
        s: *mut ::core::ffi::c_void,
        csType: cuda_types::cublas::cudaDataType,
        executiontype: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSrotm_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut f32,
        incx: ::core::ffi::c_int,
        y: *mut f32,
        incy: ::core::ffi::c_int,
        param: *const f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSrotm_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut f32,
        incx: i64,
        y: *mut f32,
        incy: i64,
        param: *const f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDrotm_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut f64,
        incx: ::core::ffi::c_int,
        y: *mut f64,
        incy: ::core::ffi::c_int,
        param: *const f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDrotm_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut f64,
        incx: i64,
        y: *mut f64,
        incy: i64,
        param: *const f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasRotmEx(
        handle: cuda_types::cublas::cublasHandle_t,
        n: ::core::ffi::c_int,
        x: *mut ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: ::core::ffi::c_int,
        y: *mut ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: ::core::ffi::c_int,
        param: *const ::core::ffi::c_void,
        paramType: cuda_types::cublas::cudaDataType,
        executiontype: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasRotmEx_64(
        handle: cuda_types::cublas::cublasHandle_t,
        n: i64,
        x: *mut ::core::ffi::c_void,
        xType: cuda_types::cublas::cudaDataType,
        incx: i64,
        y: *mut ::core::ffi::c_void,
        yType: cuda_types::cublas::cudaDataType,
        incy: i64,
        param: *const ::core::ffi::c_void,
        paramType: cuda_types::cublas::cudaDataType,
        executiontype: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSrotmg_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        d1: *mut f32,
        d2: *mut f32,
        x1: *mut f32,
        y1: *const f32,
        param: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDrotmg_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        d1: *mut f64,
        d2: *mut f64,
        x1: *mut f64,
        y1: *const f64,
        param: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasRotmgEx(
        handle: cuda_types::cublas::cublasHandle_t,
        d1: *mut ::core::ffi::c_void,
        d1Type: cuda_types::cublas::cudaDataType,
        d2: *mut ::core::ffi::c_void,
        d2Type: cuda_types::cublas::cudaDataType,
        x1: *mut ::core::ffi::c_void,
        x1Type: cuda_types::cublas::cudaDataType,
        y1: *const ::core::ffi::c_void,
        y1Type: cuda_types::cublas::cudaDataType,
        param: *mut ::core::ffi::c_void,
        paramType: cuda_types::cublas::cudaDataType,
        executiontype: cuda_types::cublas::cudaDataType,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSgemv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        A: *const f32,
        lda: ::core::ffi::c_int,
        x: *const f32,
        incx: ::core::ffi::c_int,
        beta: *const f32,
        y: *mut f32,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSgemv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const f32,
        A: *const f32,
        lda: i64,
        x: *const f32,
        incx: i64,
        beta: *const f32,
        y: *mut f32,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDgemv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        A: *const f64,
        lda: ::core::ffi::c_int,
        x: *const f64,
        incx: ::core::ffi::c_int,
        beta: *const f64,
        y: *mut f64,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDgemv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const f64,
        A: *const f64,
        lda: i64,
        x: *const f64,
        incx: i64,
        beta: *const f64,
        y: *mut f64,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCgemv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        A: *const cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCgemv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        A: *const cuda_types::cublas::cuComplex,
        lda: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZgemv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZgemv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSgbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        kl: ::core::ffi::c_int,
        ku: ::core::ffi::c_int,
        alpha: *const f32,
        A: *const f32,
        lda: ::core::ffi::c_int,
        x: *const f32,
        incx: ::core::ffi::c_int,
        beta: *const f32,
        y: *mut f32,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSgbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        kl: i64,
        ku: i64,
        alpha: *const f32,
        A: *const f32,
        lda: i64,
        x: *const f32,
        incx: i64,
        beta: *const f32,
        y: *mut f32,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDgbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        kl: ::core::ffi::c_int,
        ku: ::core::ffi::c_int,
        alpha: *const f64,
        A: *const f64,
        lda: ::core::ffi::c_int,
        x: *const f64,
        incx: ::core::ffi::c_int,
        beta: *const f64,
        y: *mut f64,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDgbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        kl: i64,
        ku: i64,
        alpha: *const f64,
        A: *const f64,
        lda: i64,
        x: *const f64,
        incx: i64,
        beta: *const f64,
        y: *mut f64,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCgbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        kl: ::core::ffi::c_int,
        ku: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        A: *const cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCgbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        kl: i64,
        ku: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        A: *const cuda_types::cublas::cuComplex,
        lda: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZgbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        kl: ::core::ffi::c_int,
        ku: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZgbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        kl: i64,
        ku: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStrmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        A: *const f32,
        lda: ::core::ffi::c_int,
        x: *mut f32,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStrmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        A: *const f32,
        lda: i64,
        x: *mut f32,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtrmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        A: *const f64,
        lda: ::core::ffi::c_int,
        x: *mut f64,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtrmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        A: *const f64,
        lda: i64,
        x: *mut f64,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtrmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        A: *const cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtrmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        A: *const cuda_types::cublas::cuComplex,
        lda: i64,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtrmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtrmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: i64,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        A: *const f32,
        lda: ::core::ffi::c_int,
        x: *mut f32,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        k: i64,
        A: *const f32,
        lda: i64,
        x: *mut f32,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        A: *const f64,
        lda: ::core::ffi::c_int,
        x: *mut f64,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        k: i64,
        A: *const f64,
        lda: i64,
        x: *mut f64,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        A: *const cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        k: i64,
        A: *const cuda_types::cublas::cuComplex,
        lda: i64,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        k: i64,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: i64,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStpmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        AP: *const f32,
        x: *mut f32,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStpmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        AP: *const f32,
        x: *mut f32,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtpmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        AP: *const f64,
        x: *mut f64,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtpmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        AP: *const f64,
        x: *mut f64,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtpmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        AP: *const cuda_types::cublas::cuComplex,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtpmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        AP: *const cuda_types::cublas::cuComplex,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtpmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        AP: *const cuda_types::cublas::cuDoubleComplex,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtpmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        AP: *const cuda_types::cublas::cuDoubleComplex,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStrsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        A: *const f32,
        lda: ::core::ffi::c_int,
        x: *mut f32,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStrsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        A: *const f32,
        lda: i64,
        x: *mut f32,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtrsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        A: *const f64,
        lda: ::core::ffi::c_int,
        x: *mut f64,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtrsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        A: *const f64,
        lda: i64,
        x: *mut f64,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtrsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        A: *const cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtrsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        A: *const cuda_types::cublas::cuComplex,
        lda: i64,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtrsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtrsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: i64,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStpsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        AP: *const f32,
        x: *mut f32,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStpsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        AP: *const f32,
        x: *mut f32,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtpsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        AP: *const f64,
        x: *mut f64,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtpsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        AP: *const f64,
        x: *mut f64,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtpsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        AP: *const cuda_types::cublas::cuComplex,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtpsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        AP: *const cuda_types::cublas::cuComplex,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtpsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        AP: *const cuda_types::cublas::cuDoubleComplex,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtpsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        AP: *const cuda_types::cublas::cuDoubleComplex,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStbsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        A: *const f32,
        lda: ::core::ffi::c_int,
        x: *mut f32,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasStbsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        k: i64,
        A: *const f32,
        lda: i64,
        x: *mut f32,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtbsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        A: *const f64,
        lda: ::core::ffi::c_int,
        x: *mut f64,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDtbsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        k: i64,
        A: *const f64,
        lda: i64,
        x: *mut f64,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtbsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        A: *const cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCtbsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        k: i64,
        A: *const cuda_types::cublas::cuComplex,
        lda: i64,
        x: *mut cuda_types::cublas::cuComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtbsv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZtbsv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        trans: cuda_types::cublas::cublasOperation_t,
        diag: cuda_types::cublas::cublasDiagType_t,
        n: i64,
        k: i64,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: i64,
        x: *mut cuda_types::cublas::cuDoubleComplex,
        incx: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSsymv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        A: *const f32,
        lda: ::core::ffi::c_int,
        x: *const f32,
        incx: ::core::ffi::c_int,
        beta: *const f32,
        y: *mut f32,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSsymv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f32,
        A: *const f32,
        lda: i64,
        x: *const f32,
        incx: i64,
        beta: *const f32,
        y: *mut f32,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDsymv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        A: *const f64,
        lda: ::core::ffi::c_int,
        x: *const f64,
        incx: ::core::ffi::c_int,
        beta: *const f64,
        y: *mut f64,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDsymv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f64,
        A: *const f64,
        lda: i64,
        x: *const f64,
        incx: i64,
        beta: *const f64,
        y: *mut f64,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCsymv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        A: *const cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCsymv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        A: *const cuda_types::cublas::cuComplex,
        lda: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZsymv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZsymv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasChemv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        A: *const cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasChemv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        A: *const cuda_types::cublas::cuComplex,
        lda: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZhemv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZhemv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSsbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        alpha: *const f32,
        A: *const f32,
        lda: ::core::ffi::c_int,
        x: *const f32,
        incx: ::core::ffi::c_int,
        beta: *const f32,
        y: *mut f32,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSsbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        k: i64,
        alpha: *const f32,
        A: *const f32,
        lda: i64,
        x: *const f32,
        incx: i64,
        beta: *const f32,
        y: *mut f32,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDsbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        alpha: *const f64,
        A: *const f64,
        lda: ::core::ffi::c_int,
        x: *const f64,
        incx: ::core::ffi::c_int,
        beta: *const f64,
        y: *mut f64,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDsbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        k: i64,
        alpha: *const f64,
        A: *const f64,
        lda: i64,
        x: *const f64,
        incx: i64,
        beta: *const f64,
        y: *mut f64,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasChbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        A: *const cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasChbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        k: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        A: *const cuda_types::cublas::cuComplex,
        lda: i64,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZhbmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        k: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZhbmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        k: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        A: *const cuda_types::cublas::cuDoubleComplex,
        lda: i64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSspmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        AP: *const f32,
        x: *const f32,
        incx: ::core::ffi::c_int,
        beta: *const f32,
        y: *mut f32,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSspmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f32,
        AP: *const f32,
        x: *const f32,
        incx: i64,
        beta: *const f32,
        y: *mut f32,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDspmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        AP: *const f64,
        x: *const f64,
        incx: ::core::ffi::c_int,
        beta: *const f64,
        y: *mut f64,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDspmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f64,
        AP: *const f64,
        x: *const f64,
        incx: i64,
        beta: *const f64,
        y: *mut f64,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasChpmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        AP: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasChpmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        AP: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuComplex,
        y: *mut cuda_types::cublas::cuComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZhpmv_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        AP: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZhpmv_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        AP: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        y: *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSger_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        x: *const f32,
        incx: ::core::ffi::c_int,
        y: *const f32,
        incy: ::core::ffi::c_int,
        A: *mut f32,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSger_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        m: i64,
        n: i64,
        alpha: *const f32,
        x: *const f32,
        incx: i64,
        y: *const f32,
        incy: i64,
        A: *mut f32,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDger_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        x: *const f64,
        incx: ::core::ffi::c_int,
        y: *const f64,
        incy: ::core::ffi::c_int,
        A: *mut f64,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDger_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        m: i64,
        n: i64,
        alpha: *const f64,
        x: *const f64,
        incx: i64,
        y: *const f64,
        incy: i64,
        A: *mut f64,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCgeru_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCgeru_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        m: i64,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuComplex,
        incy: i64,
        A: *mut cuda_types::cublas::cuComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCgerc_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCgerc_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        m: i64,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuComplex,
        incy: i64,
        A: *mut cuda_types::cublas::cuComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZgeru_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZgeru_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        m: i64,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: i64,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZgerc_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZgerc_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        m: i64,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: i64,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSsyr_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        x: *const f32,
        incx: ::core::ffi::c_int,
        A: *mut f32,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSsyr_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f32,
        x: *const f32,
        incx: i64,
        A: *mut f32,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDsyr_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        x: *const f64,
        incx: ::core::ffi::c_int,
        A: *mut f64,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDsyr_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f64,
        x: *const f64,
        incx: i64,
        A: *mut f64,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCsyr_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCsyr_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        A: *mut cuda_types::cublas::cuComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZsyr_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZsyr_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCher_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCher_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f32,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        A: *mut cuda_types::cublas::cuComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZher_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZher_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSspr_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        x: *const f32,
        incx: ::core::ffi::c_int,
        AP: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSspr_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f32,
        x: *const f32,
        incx: i64,
        AP: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDspr_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        x: *const f64,
        incx: ::core::ffi::c_int,
        AP: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDspr_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f64,
        x: *const f64,
        incx: i64,
        AP: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasChpr_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        AP: *mut cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasChpr_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f32,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        AP: *mut cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZhpr_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        AP: *mut cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZhpr_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f64,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        AP: *mut cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSsyr2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        x: *const f32,
        incx: ::core::ffi::c_int,
        y: *const f32,
        incy: ::core::ffi::c_int,
        A: *mut f32,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSsyr2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f32,
        x: *const f32,
        incx: i64,
        y: *const f32,
        incy: i64,
        A: *mut f32,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDsyr2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        x: *const f64,
        incx: ::core::ffi::c_int,
        y: *const f64,
        incy: ::core::ffi::c_int,
        A: *mut f64,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDsyr2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f64,
        x: *const f64,
        incx: i64,
        y: *const f64,
        incy: i64,
        A: *mut f64,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCsyr2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCsyr2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuComplex,
        incy: i64,
        A: *mut cuda_types::cublas::cuComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZsyr2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZsyr2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: i64,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCher2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCher2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuComplex,
        incy: i64,
        A: *mut cuda_types::cublas::cuComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZher2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZher2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: i64,
        A: *mut cuda_types::cublas::cuDoubleComplex,
        lda: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSspr2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        x: *const f32,
        incx: ::core::ffi::c_int,
        y: *const f32,
        incy: ::core::ffi::c_int,
        AP: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSspr2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f32,
        x: *const f32,
        incx: i64,
        y: *const f32,
        incy: i64,
        AP: *mut f32,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDspr2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        x: *const f64,
        incx: ::core::ffi::c_int,
        y: *const f64,
        incy: ::core::ffi::c_int,
        AP: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDspr2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const f64,
        x: *const f64,
        incx: i64,
        y: *const f64,
        incy: i64,
        AP: *mut f64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasChpr2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
        AP: *mut cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasChpr2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        x: *const cuda_types::cublas::cuComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuComplex,
        incy: i64,
        AP: *mut cuda_types::cublas::cuComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZhpr2_v2(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
        AP: *mut cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZhpr2_v2_64(
        handle: cuda_types::cublas::cublasHandle_t,
        uplo: cuda_types::cublas::cublasFillMode_t,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        x: *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        y: *const cuda_types::cublas::cuDoubleComplex,
        incy: i64,
        AP: *mut cuda_types::cublas::cuDoubleComplex,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSgemvBatched(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        Aarray: *const *const f32,
        lda: ::core::ffi::c_int,
        xarray: *const *const f32,
        incx: ::core::ffi::c_int,
        beta: *const f32,
        yarray: *const *mut f32,
        incy: ::core::ffi::c_int,
        batchCount: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSgemvBatched_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const f32,
        Aarray: *const *const f32,
        lda: i64,
        xarray: *const *const f32,
        incx: i64,
        beta: *const f32,
        yarray: *const *mut f32,
        incy: i64,
        batchCount: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDgemvBatched(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f64,
        Aarray: *const *const f64,
        lda: ::core::ffi::c_int,
        xarray: *const *const f64,
        incx: ::core::ffi::c_int,
        beta: *const f64,
        yarray: *const *mut f64,
        incy: ::core::ffi::c_int,
        batchCount: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasDgemvBatched_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const f64,
        Aarray: *const *const f64,
        lda: i64,
        xarray: *const *const f64,
        incx: i64,
        beta: *const f64,
        yarray: *const *mut f64,
        incy: i64,
        batchCount: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCgemvBatched(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuComplex,
        Aarray: *const *const cuda_types::cublas::cuComplex,
        lda: ::core::ffi::c_int,
        xarray: *const *const cuda_types::cublas::cuComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuComplex,
        yarray: *const *mut cuda_types::cublas::cuComplex,
        incy: ::core::ffi::c_int,
        batchCount: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasCgemvBatched_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const cuda_types::cublas::cuComplex,
        Aarray: *const *const cuda_types::cublas::cuComplex,
        lda: i64,
        xarray: *const *const cuda_types::cublas::cuComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuComplex,
        yarray: *const *mut cuda_types::cublas::cuComplex,
        incy: i64,
        batchCount: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZgemvBatched(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        Aarray: *const *const cuda_types::cublas::cuDoubleComplex,
        lda: ::core::ffi::c_int,
        xarray: *const *const cuda_types::cublas::cuDoubleComplex,
        incx: ::core::ffi::c_int,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        yarray: *const *mut cuda_types::cublas::cuDoubleComplex,
        incy: ::core::ffi::c_int,
        batchCount: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasZgemvBatched_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const cuda_types::cublas::cuDoubleComplex,
        Aarray: *const *const cuda_types::cublas::cuDoubleComplex,
        lda: i64,
        xarray: *const *const cuda_types::cublas::cuDoubleComplex,
        incx: i64,
        beta: *const cuda_types::cublas::cuDoubleComplex,
        yarray: *const *mut cuda_types::cublas::cuDoubleComplex,
        incy: i64,
        batchCount: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasHSHgemvBatched(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        Aarray: *const *const cuda_types::cublas::__half,
        lda: ::core::ffi::c_int,
        xarray: *const *const cuda_types::cublas::__half,
        incx: ::core::ffi::c_int,
        beta: *const f32,
        yarray: *const *mut cuda_types::cublas::__half,
        incy: ::core::ffi::c_int,
        batchCount: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasHSHgemvBatched_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const f32,
        Aarray: *const *const cuda_types::cublas::__half,
        lda: i64,
        xarray: *const *const cuda_types::cublas::__half,
        incx: i64,
        beta: *const f32,
        yarray: *const *mut cuda_types::cublas::__half,
        incy: i64,
        batchCount: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasHSSgemvBatched(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        Aarray: *const *const cuda_types::cublas::__half,
        lda: ::core::ffi::c_int,
        xarray: *const *const cuda_types::cublas::__half,
        incx: ::core::ffi::c_int,
        beta: *const f32,
        yarray: *const *mut f32,
        incy: ::core::ffi::c_int,
        batchCount: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasHSSgemvBatched_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const f32,
        Aarray: *const *const cuda_types::cublas::__half,
        lda: i64,
        xarray: *const *const cuda_types::cublas::__half,
        incx: i64,
        beta: *const f32,
        yarray: *const *mut f32,
        incy: i64,
        batchCount: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasTSTgemvBatched(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        Aarray: *const *const cuda_types::cublas::__nv_bfloat16,
        lda: ::core::ffi::c_int,
        xarray: *const *const cuda_types::cublas::__nv_bfloat16,
        incx: ::core::ffi::c_int,
        beta: *const f32,
        yarray: *const *mut cuda_types::cublas::__nv_bfloat16,
        incy: ::core::ffi::c_int,
        batchCount: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasTSTgemvBatched_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const f32,
        Aarray: *const *const cuda_types::cublas::__nv_bfloat16,
        lda: i64,
        xarray: *const *const cuda_types::cublas::__nv_bfloat16,
        incx: i64,
        beta: *const f32,
        yarray: *const *mut cuda_types::cublas::__nv_bfloat16,
        incy: i64,
        batchCount: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasTSSgemvBatched(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        Aarray: *const *const cuda_types::cublas::__nv_bfloat16,
        lda: ::core::ffi::c_int,
        xarray: *const *const cuda_types::cublas::__nv_bfloat16,
        incx: ::core::ffi::c_int,
        beta: *const f32,
        yarray: *const *mut f32,
        incy: ::core::ffi::c_int,
        batchCount: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasTSSgemvBatched_64(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: i64,
        n: i64,
        alpha: *const f32,
        Aarray: *const *const cuda_types::cublas::__nv_bfloat16,
        lda: i64,
        xarray: *const *const cuda_types::cublas::__nv_bfloat16,
        incx: i64,
        beta: *const f32,
        yarray: *const *mut f32,
        incy: i64,
        batchCount: i64,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSgemvStridedBatched(
        handle: cuda_types::cublas::cublasHandle_t,
        trans: cuda_types::cublas::cublasOperation_t,
        m: ::core::ffi::c_int,
        n: ::core::ffi::c_int,
        alpha: *const f32,
        A: *const f32,
        lda: ::core::ffi::c_int,
        strideA: ::core::ffi::c_longlong,
        x: *const f32,
        incx: ::core::ffi::c_int,
        stridex: ::core::ffi::c_longlong,
        beta: *const f32,
        y: *mut f32,
        incy: ::core::ffi::c_int,
        stridey: ::core::ffi::c_longlong,
        batchCount: ::core::ffi::c_int,
    ) -> cuda_types::cublas::cublasStatus_t;
    #[must_use]
    fn cublasSgemvStridedBatched_64(
        handle: cuda_types::cubl
Download .txt
gitextract_ehz427hu/

├── .cargo/
│   └── config.toml
├── .devcontainer/
│   ├── Dockerfile
│   └── devcontainer.json
├── .git-blame-ignore-revs
├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── config.yml
│   │   └── zluda_dump.yml
│   └── workflows/
│       ├── move_tests.sh
│       ├── nightly_tests.yml
│       ├── pr_master.yml
│       ├── push_master.yml
│       ├── rocm_setup_build.sh
│       ├── rocm_setup_run.sh
│       └── trigger_nightly_tests.yml
├── .gitignore
├── .gitmodules
├── .rustfmt.toml
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── compiler/
│   ├── Cargo.toml
│   └── src/
│       ├── error.rs
│       └── main.rs
├── cuda_check/
│   ├── Cargo.toml
│   └── src/
│       ├── main.rs
│       └── win.rs
├── cuda_macros/
│   ├── .rustfmt.toml
│   ├── Cargo.toml
│   ├── build/
│   │   └── wrapper.h
│   └── src/
│       ├── cublas.rs
│       ├── cublaslt.rs
│       ├── cublaslt_internal.rs
│       ├── cuda.rs
│       ├── cudnn8.rs
│       ├── cudnn9.rs
│       ├── cufft.rs
│       ├── cusparse.rs
│       ├── lib.rs
│       └── nvml.rs
├── cuda_types/
│   ├── .rustfmt.toml
│   ├── Cargo.toml
│   └── src/
│       ├── cublas.rs
│       ├── cublaslt.rs
│       ├── cuda.rs
│       ├── cudnn.rs
│       ├── cudnn8.rs
│       ├── cudnn9.rs
│       ├── cufft.rs
│       ├── cusparse.rs
│       ├── dark_api.rs
│       ├── lib.rs
│       └── nvml.rs
├── dark_api/
│   ├── Cargo.toml
│   └── src/
│       ├── fatbin.rs
│       └── lib.rs
├── detours-sys/
│   ├── Cargo.toml
│   ├── LICENSE-APACHE
│   ├── LICENSE-MIT
│   ├── README.md
│   ├── build/
│   │   └── wrapper.h
│   ├── build.rs
│   └── src/
│       ├── bundled_bindings.rs
│       └── lib.rs
├── docs/
│   ├── .gitignore
│   ├── .readthedocs.yaml
│   ├── book.toml
│   └── src/
│       ├── SUMMARY.md
│       ├── building.md
│       ├── faq.md
│       ├── hip_sdk.md
│       ├── llama_cpp.md
│       ├── precompiling.md
│       ├── quick_start.md
│       └── troubleshooting.md
├── ext/
│   ├── detours/
│   │   ├── .github/
│   │   │   ├── ISSUE_TEMPLATE/
│   │   │   │   ├── bug-report.md
│   │   │   │   └── question.md
│   │   │   ├── PULL_REQUEST_TEMPLATE/
│   │   │   │   └── pull_request_template.md
│   │   │   ├── codeql/
│   │   │   │   └── codeql-config.yml
│   │   │   └── workflows/
│   │   │       └── main.yml
│   │   ├── .gitignore
│   │   ├── CREDITS.TXT
│   │   ├── LICENSE.md
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── samples/
│   │   │   ├── Makefile
│   │   │   ├── README.TXT
│   │   │   ├── comeasy/
│   │   │   │   ├── Makefile
│   │   │   │   ├── comeasy.cpp
│   │   │   │   ├── wrotei.cpp
│   │   │   │   └── wrotei.rc
│   │   │   ├── commem/
│   │   │   │   ├── Makefile
│   │   │   │   └── commem.cpp
│   │   │   ├── common.mak
│   │   │   ├── cping/
│   │   │   │   ├── Makefile
│   │   │   │   ├── ReadMe.Txt
│   │   │   │   ├── cping.cpp
│   │   │   │   └── iping.idl
│   │   │   ├── disas/
│   │   │   │   ├── Makefile
│   │   │   │   ├── arm.asm
│   │   │   │   ├── disas.cpp
│   │   │   │   ├── ia64.asm
│   │   │   │   ├── unk.cpp
│   │   │   │   ├── x64.asm
│   │   │   │   └── x86.cpp
│   │   │   ├── dtest/
│   │   │   │   ├── Makefile
│   │   │   │   ├── NORMAL_IA64.TXT
│   │   │   │   ├── NORMAL_X64.TXT
│   │   │   │   ├── NORMAL_X86.TXT
│   │   │   │   ├── dtarge.cpp
│   │   │   │   ├── dtarge.h
│   │   │   │   ├── dtarge.rc
│   │   │   │   └── dtest.cpp
│   │   │   ├── dumpe/
│   │   │   │   ├── Makefile
│   │   │   │   └── dumpe.cpp
│   │   │   ├── dumpi/
│   │   │   │   ├── Makefile
│   │   │   │   └── dumpi.cpp
│   │   │   ├── dynamic_alloc/
│   │   │   │   ├── Makefile
│   │   │   │   ├── main.cpp
│   │   │   │   ├── x64.asm
│   │   │   │   └── x86.asm
│   │   │   ├── echo/
│   │   │   │   ├── Makefile
│   │   │   │   ├── echofx.cpp
│   │   │   │   ├── echofx.rc
│   │   │   │   ├── echonul.cpp
│   │   │   │   └── main.cpp
│   │   │   ├── einst/
│   │   │   │   ├── Makefile
│   │   │   │   ├── edll1x.cpp
│   │   │   │   ├── edll2x.cpp
│   │   │   │   ├── edll3x.cpp
│   │   │   │   └── einst.cpp
│   │   │   ├── excep/
│   │   │   │   ├── Makefile
│   │   │   │   ├── excep.cpp
│   │   │   │   ├── firstexc.cpp
│   │   │   │   └── firstexc.h
│   │   │   ├── findfunc/
│   │   │   │   ├── Makefile
│   │   │   │   ├── extend.cpp
│   │   │   │   ├── extend.rc
│   │   │   │   ├── findfunc.cpp
│   │   │   │   ├── symtest.cpp
│   │   │   │   ├── target.cpp
│   │   │   │   ├── target.h
│   │   │   │   └── target.rc
│   │   │   ├── impmunge/
│   │   │   │   ├── Makefile
│   │   │   │   └── impmunge.cpp
│   │   │   ├── member/
│   │   │   │   ├── Makefile
│   │   │   │   └── member.cpp
│   │   │   ├── opengl/
│   │   │   │   ├── Makefile
│   │   │   │   ├── ogldet.cpp
│   │   │   │   ├── ogldet.rc
│   │   │   │   └── testogl.cpp
│   │   │   ├── region/
│   │   │   │   ├── Makefile
│   │   │   │   └── region.cpp
│   │   │   ├── setdll/
│   │   │   │   ├── Makefile
│   │   │   │   └── setdll.cpp
│   │   │   ├── simple/
│   │   │   │   ├── Makefile
│   │   │   │   ├── simple.cpp
│   │   │   │   ├── simple.rc
│   │   │   │   └── sleep5.cpp
│   │   │   ├── slept/
│   │   │   │   ├── Makefile
│   │   │   │   ├── NORMAL_IA64.TXT
│   │   │   │   ├── NORMAL_X64.TXT
│   │   │   │   ├── NORMAL_X86.TXT
│   │   │   │   ├── dslept.cpp
│   │   │   │   ├── dslept.rc
│   │   │   │   ├── sleepbed.cpp
│   │   │   │   ├── sleepnew.cpp
│   │   │   │   ├── sleepold.cpp
│   │   │   │   ├── slept.cpp
│   │   │   │   ├── slept.h
│   │   │   │   ├── slept.rc
│   │   │   │   └── verify.cpp
│   │   │   ├── syelog/
│   │   │   │   ├── Makefile
│   │   │   │   ├── sltest.cpp
│   │   │   │   ├── sltestp.cpp
│   │   │   │   ├── syelog.cpp
│   │   │   │   ├── syelog.h
│   │   │   │   └── syelogd.cpp
│   │   │   ├── talloc/
│   │   │   │   ├── Makefile
│   │   │   │   ├── NORMAL_IA64.TXT
│   │   │   │   ├── NORMAL_X64.TXT
│   │   │   │   ├── talloc.cpp
│   │   │   │   ├── tdll1x.cpp
│   │   │   │   ├── tdll2x.cpp
│   │   │   │   ├── tdll3x.cpp
│   │   │   │   ├── tdll4x.cpp
│   │   │   │   ├── tdll5x.cpp
│   │   │   │   ├── tdll6x.cpp
│   │   │   │   ├── tdll7x.cpp
│   │   │   │   ├── tdll8x.cpp
│   │   │   │   └── tdll9x.cpp
│   │   │   ├── traceapi/
│   │   │   │   ├── Makefile
│   │   │   │   ├── _win32.cpp
│   │   │   │   ├── testapi.cpp
│   │   │   │   ├── trcapi.cpp
│   │   │   │   └── trcapi.rc
│   │   │   ├── tracebld/
│   │   │   │   ├── Makefile
│   │   │   │   ├── tracebld.cpp
│   │   │   │   ├── tracebld.h
│   │   │   │   ├── trcbld.cpp
│   │   │   │   └── trcbld.rc
│   │   │   ├── tracelnk/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trclnk.cpp
│   │   │   │   └── trclnk.rc
│   │   │   ├── tracemem/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trcmem.cpp
│   │   │   │   └── trcmem.rc
│   │   │   ├── tracereg/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trcreg.cpp
│   │   │   │   └── trcreg.rc
│   │   │   ├── traceser/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trcser.cpp
│   │   │   │   └── trcser.rc
│   │   │   ├── tracessl/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trcssl.cpp
│   │   │   │   └── trcssl.rc
│   │   │   ├── tracetcp/
│   │   │   │   ├── Makefile
│   │   │   │   ├── trctcp.cpp
│   │   │   │   └── trctcp.rc
│   │   │   ├── tryman/
│   │   │   │   ├── Makefile
│   │   │   │   ├── managed.cs
│   │   │   │   ├── size.cpp
│   │   │   │   ├── tryman.cpp
│   │   │   │   ├── tstman.cpp
│   │   │   │   └── tstman.rc
│   │   │   └── withdll/
│   │   │       ├── Makefile
│   │   │       └── withdll.cpp
│   │   ├── src/
│   │   │   ├── Makefile
│   │   │   ├── creatwth.cpp
│   │   │   ├── detours.cpp
│   │   │   ├── detours.h
│   │   │   ├── detver.h
│   │   │   ├── disasm.cpp
│   │   │   ├── disolarm.cpp
│   │   │   ├── disolarm64.cpp
│   │   │   ├── disolia64.cpp
│   │   │   ├── disolx64.cpp
│   │   │   ├── disolx86.cpp
│   │   │   ├── image.cpp
│   │   │   ├── modules.cpp
│   │   │   └── uimports.cpp
│   │   ├── system.mak
│   │   ├── tests/
│   │   │   ├── Makefile
│   │   │   ├── catch.hpp
│   │   │   ├── corruptor.cpp
│   │   │   ├── corruptor.h
│   │   │   ├── main.cpp
│   │   │   ├── test_image_api.cpp
│   │   │   └── test_module_api.cpp
│   │   └── vc/
│   │       ├── Detours.sln
│   │       ├── Detours.vcxproj
│   │       └── Detours.vcxproj.filters
│   ├── highs-sys/
│   │   ├── Cargo.toml
│   │   ├── README.md
│   │   ├── build.rs
│   │   ├── install-dependencies.sh
│   │   ├── src/
│   │   │   ├── c_bindings.rs
│   │   │   └── lib.rs
│   │   ├── tests/
│   │   │   ├── test_highs_call.rs
│   │   │   └── test_highs_functions.rs
│   │   └── wrapper.h
│   ├── hip_runtime-sys/
│   │   ├── .rustfmt.toml
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   ├── hipblaslt-sys/
│   │   ├── .rustfmt.toml
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   ├── miopen-sys/
│   │   ├── .rustfmt.toml
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   ├── rocblas-sys/
│   │   ├── .rustfmt.toml
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   ├── rocm_smi-sys/
│   │   ├── .rustfmt.toml
│   │   ├── Cargo.toml
│   │   ├── build.rs
│   │   └── src/
│   │       └── lib.rs
│   └── rocsparse-sys/
│       ├── .rustfmt.toml
│       ├── Cargo.toml
│       ├── build.rs
│       └── src/
│           └── lib.rs
├── format/
│   ├── .rustfmt.toml
│   ├── Cargo.toml
│   └── src/
│       ├── dark_api.rs
│       ├── dnn8.rs
│       ├── dnn9.rs
│       ├── format_generated.rs
│       ├── format_generated_blas.rs
│       ├── format_generated_blaslt.rs
│       ├── format_generated_blaslt_internal.rs
│       ├── format_generated_dnn8.rs
│       ├── format_generated_dnn9.rs
│       ├── format_generated_fft.rs
│       ├── format_generated_nvml.rs
│       ├── format_generated_sparse.rs
│       └── lib.rs
├── llvm_zluda/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── compile.rs
│       ├── device-libs/
│       │   ├── LICENSE.TXT
│       │   ├── README.md
│       │   ├── ockl.bc
│       │   └── ocml.bc
│       ├── ffi.rs
│       ├── lib.cpp
│       ├── lib.rs
│       └── utils.rs
├── ptx/
│   ├── Cargo.toml
│   ├── lib/
│   │   ├── zluda_ptx_impl.bc
│   │   └── zluda_ptx_impl.cpp
│   └── src/
│       ├── lib.rs
│       ├── pass/
│       │   ├── deparamize_functions.rs
│       │   ├── expand_operands.rs
│       │   ├── fix_special_registers.rs
│       │   ├── hoist_globals.rs
│       │   ├── insert_explicit_load_store.rs
│       │   ├── insert_implicit_conversions.rs
│       │   ├── insert_post_saturation.rs
│       │   ├── instruction_mode_to_global_mode/
│       │   │   ├── call_with_mode.ptx
│       │   │   ├── fold_denormal.ptx
│       │   │   ├── mod.rs
│       │   │   └── test.rs
│       │   ├── llvm/
│       │   │   ├── attributes.rs
│       │   │   ├── emit.rs
│       │   │   └── mod.rs
│       │   ├── mod.rs
│       │   ├── normalize_basic_blocks.rs
│       │   ├── normalize_identifiers.rs
│       │   ├── normalize_predicates.rs
│       │   ├── remove_unreachable_basic_blocks.rs
│       │   ├── replace_instructions_with_functions.rs
│       │   ├── replace_instructions_with_functions_fp_required.rs
│       │   ├── replace_known_functions.rs
│       │   ├── resolve_function_pointers.rs
│       │   └── test/
│       │       ├── expand_operands/
│       │       │   ├── immediate_conversion.ptx
│       │       │   ├── immediates.ptx
│       │       │   ├── mod.rs
│       │       │   ├── vector_extract.ptx
│       │       │   ├── vector_operand.ptx
│       │       │   └── vector_operand_convert.ptx
│       │       ├── insert_implicit_conversions/
│       │       │   ├── default.ptx
│       │       │   ├── default_reg_b32_reg_f16x2.ptx
│       │       │   ├── default_reg_b32_reg_v2_b16.ptx
│       │       │   ├── default_relaxed.ptx
│       │       │   └── mod.rs
│       │       ├── instruction_mode_to_global_mode/
│       │       │   ├── mod.rs
│       │       │   └── mode_conflict.ptx
│       │       ├── mod.rs
│       │       └── normalize_basic_blocks/
│       │           ├── mod.rs
│       │           └── trap.ptx
│       └── test/
│           ├── _Z9vectorAddPKfS0_Pfi.ptx
│           ├── ll/
│           │   ├── _attributes.ll
│           │   ├── abs.ll
│           │   ├── activemask.ll
│           │   ├── add.ll
│           │   ├── add_extended.ll
│           │   ├── add_ftz.ll
│           │   ├── add_non_coherent.ll
│           │   ├── add_s32_sat.ll
│           │   ├── add_tuning.ll
│           │   ├── addc_cc_s32.ll
│           │   ├── and.ll
│           │   ├── assertfail.ll
│           │   ├── atom_add.ll
│           │   ├── atom_add_float.ll
│           │   ├── atom_cas.ll
│           │   ├── atom_inc.ll
│           │   ├── b64tof64.ll
│           │   ├── bar_red_and_pred.ll
│           │   ├── bench.ll
│           │   ├── bfe.ll
│           │   ├── bfi.ll
│           │   ├── block.ll
│           │   ├── bmsk_clamp_b32.ll
│           │   ├── bra.ll
│           │   ├── brev.ll
│           │   ├── call.ll
│           │   ├── call_rnd.ll
│           │   ├── clz.ll
│           │   ├── const.ll
│           │   ├── const_ident.ll
│           │   ├── constant_f32.ll
│           │   ├── constant_negative.ll
│           │   ├── copysign.ll
│           │   ├── cos.ll
│           │   ├── cp_async.ll
│           │   ├── createpolicy.ll
│           │   ├── cvt_f16x2_f32.ll
│           │   ├── cvt_f64_f32.ll
│           │   ├── cvt_pack.ll
│           │   ├── cvt_relu_f16x2_f32.ll
│           │   ├── cvt_rn_bf16x2_f32.ll
│           │   ├── cvt_rn_f16x2_e4m3x2.ll
│           │   ├── cvt_rn_f16x2_e5m2x2.ll
│           │   ├── cvt_rn_satfinite_e4m3x2_f32.ll
│           │   ├── cvt_rn_satfinite_e5m2x2_f32.ll
│           │   ├── cvt_rni.ll
│           │   ├── cvt_rni_u16_f32.ll
│           │   ├── cvt_rzi.ll
│           │   ├── cvt_s16_s8.ll
│           │   ├── cvt_s32_f32.ll
│           │   ├── cvt_s64_s32.ll
│           │   ├── cvt_sat_s_u.ll
│           │   ├── cvta.ll
│           │   ├── div_approx.ll
│           │   ├── div_ftz.ll
│           │   ├── div_noftz.ll
│           │   ├── dp2a.ll
│           │   ├── dp4a.ll
│           │   ├── ex2.ll
│           │   ├── extern_func.ll
│           │   ├── extern_shared.ll
│           │   ├── extern_shared_call.ll
│           │   ├── fma.ll
│           │   ├── fma_bf16x2.ll
│           │   ├── fma_f16x2.ll
│           │   ├── fmax.ll
│           │   ├── global_array.ll
│           │   ├── global_array_f32.ll
│           │   ├── lanemask_le.ll
│           │   ├── lanemask_lt.ll
│           │   ├── ld_st.ll
│           │   ├── ld_st_implicit.ll
│           │   ├── ld_st_offset.ll
│           │   ├── ldmatrix.ll
│           │   ├── ldmatrix_trans.ll
│           │   ├── lg2.ll
│           │   ├── local_align.ll
│           │   ├── mad_extended.ll
│           │   ├── mad_s32.ll
│           │   ├── mad_wide.ll
│           │   ├── malformed_label.ll
│           │   ├── max.ll
│           │   ├── membar.ll
│           │   ├── min.ll
│           │   ├── min_f16.ll
│           │   ├── min_nan_f16.ll
│           │   ├── mma_m16n8k16_f32_bf16_bf16_f32.ll
│           │   ├── mma_m16n8k16_f32_bf16_bf16_f32_2x.ll
│           │   ├── mma_m16n8k16_f32_f16_f16_f32.ll
│           │   ├── mma_m16n8k32_s32_s8_s8_s32.ll
│           │   ├── mma_m16n8k32_s32_s8_s8_s32_interleave.ll
│           │   ├── mov.ll
│           │   ├── mov_address.ll
│           │   ├── mul24_hi_s32.ll
│           │   ├── mul24_hi_u32.ll
│           │   ├── mul24_lo_s32.ll
│           │   ├── mul24_lo_u32.ll
│           │   ├── mul_ftz.ll
│           │   ├── mul_hi.ll
│           │   ├── mul_lo.ll
│           │   ├── mul_non_ftz.ll
│           │   ├── mul_wide.ll
│           │   ├── multiple_return.ll
│           │   ├── nanosleep.ll
│           │   ├── neg.ll
│           │   ├── non_scalar_ptr_offset.ll
│           │   ├── noreturn.ll
│           │   ├── not.ll
│           │   ├── ntid.ll
│           │   ├── or.ll
│           │   ├── param_is_addressable.ll
│           │   ├── popc.ll
│           │   ├── pred_not.ll
│           │   ├── prmt.ll
│           │   ├── prmt_slow.ll
│           │   ├── rcp.ll
│           │   ├── redux_sync_add_u32_partial.ll
│           │   ├── redux_sync_op_s32.ll
│           │   ├── redux_sync_op_u32.ll
│           │   ├── reg_local.ll
│           │   ├── reg_multi.ll
│           │   ├── rem.ll
│           │   ├── rsqrt.ll
│           │   ├── sad_s64.ll
│           │   ├── selp.ll
│           │   ├── selp_true.ll
│           │   ├── set_f16.ll
│           │   ├── setp.ll
│           │   ├── setp_gt.ll
│           │   ├── setp_leu.ll
│           │   ├── setp_nan.ll
│           │   ├── setp_num.ll
│           │   ├── shared_ptr_32.ll
│           │   ├── shared_ptr_take_address.ll
│           │   ├── shared_unify_extern.ll
│           │   ├── shared_unify_local.ll
│           │   ├── shared_variable.ll
│           │   ├── shf_l.ll
│           │   ├── shf_l_clamp.ll
│           │   ├── shf_l_wrap.ll
│           │   ├── shf_r.ll
│           │   ├── shf_r_clamp.ll
│           │   ├── shf_r_wrap.ll
│           │   ├── shfl_sync_bfly_b32_pred.ll
│           │   ├── shfl_sync_down_b32_pred.ll
│           │   ├── shfl_sync_idx_b32_pred.ll
│           │   ├── shfl_sync_mode_b32.ll
│           │   ├── shfl_sync_up_b32_pred.ll
│           │   ├── shl.ll
│           │   ├── shr.ll
│           │   ├── shr_oob.ll
│           │   ├── sign_extend.ll
│           │   ├── sin.ll
│           │   ├── sqrt.ll
│           │   ├── sqrt_rn_ftz.ll
│           │   ├── stateful_ld_st_ntid.ll
│           │   ├── stateful_ld_st_ntid_chain.ll
│           │   ├── stateful_ld_st_ntid_sub.ll
│           │   ├── stateful_ld_st_simple.ll
│           │   ├── stateful_neg_offset.ll
│           │   ├── sub.ll
│           │   ├── sub_extended.ll
│           │   ├── subc_cc_s32.ll
│           │   ├── tanh.ll
│           │   ├── tid.ll
│           │   ├── trap.ll
│           │   ├── uint_to_fp_bf16.ll
│           │   ├── vector.ll
│           │   ├── vector4.ll
│           │   ├── vector8.ll
│           │   ├── vector8_extract.ll
│           │   ├── vector_extract.ll
│           │   ├── vector_operand.ll
│           │   ├── vote_all.ll
│           │   ├── vote_all_sub.ll
│           │   ├── vote_any.ll
│           │   ├── vote_ballot.ll
│           │   ├── warp_sz.ll
│           │   └── xor.ll
│           ├── mod.rs
│           ├── operands.ptx
│           ├── spirv_build/
│           │   ├── bar_sync.ptx
│           │   ├── global_extern_array.ptx
│           │   └── param_func_array_0.ptx
│           ├── spirv_fail/
│           │   ├── const_ptr.ptx
│           │   ├── global_ptr.ptx
│           │   ├── local_ptr.txt
│           │   ├── param_entry_array_0.ptx
│           │   ├── param_vector.ptx
│           │   ├── shared_ptr.ptx
│           │   └── shared_ptr2.ptx
│           ├── spirv_run/
│           │   ├── abs.ptx
│           │   ├── activemask.ptx
│           │   ├── add.ptx
│           │   ├── add_extended.ptx
│           │   ├── add_ftz.ptx
│           │   ├── add_non_coherent.ptx
│           │   ├── add_s32_sat.ptx
│           │   ├── add_tuning.ptx
│           │   ├── addc_cc_s32.ptx
│           │   ├── and.ptx
│           │   ├── assertfail.ptx
│           │   ├── atom_add.ptx
│           │   ├── atom_add_float.ptx
│           │   ├── atom_cas.ptx
│           │   ├── atom_inc.ptx
│           │   ├── atomics_128.ptx
│           │   ├── b64tof64.ptx
│           │   ├── bar_red_and_pred.ptx
│           │   ├── bfe.ptx
│           │   ├── bfi.ptx
│           │   ├── block.ptx
│           │   ├── bmsk_clamp_b32.ptx
│           │   ├── bra.ptx
│           │   ├── brev.ptx
│           │   ├── call.ptx
│           │   ├── call_rnd.ptx
│           │   ├── clz.ptx
│           │   ├── const.ptx
│           │   ├── const_ident.ptx
│           │   ├── constant_f32.ptx
│           │   ├── constant_negative.ptx
│           │   ├── copysign.ptx
│           │   ├── cos.ptx
│           │   ├── cp_async.ptx
│           │   ├── createpolicy.ptx
│           │   ├── cvt_f16x2_f32.ptx
│           │   ├── cvt_f64_f32.ptx
│           │   ├── cvt_pack.ptx
│           │   ├── cvt_relu_f16x2_f32.ptx
│           │   ├── cvt_rn_bf16x2_f32.ptx
│           │   ├── cvt_rn_f16x2_e4m3x2.ptx
│           │   ├── cvt_rn_f16x2_e5m2x2.ptx
│           │   ├── cvt_rn_satfinite_e4m3x2_f32.ptx
│           │   ├── cvt_rn_satfinite_e5m2x2_f32.ptx
│           │   ├── cvt_rni.ptx
│           │   ├── cvt_rni_u16_f32.ptx
│           │   ├── cvt_rzi.ptx
│           │   ├── cvt_s16_s8.ptx
│           │   ├── cvt_s32_f32.ptx
│           │   ├── cvt_s64_s32.ptx
│           │   ├── cvt_sat_s_u.ptx
│           │   ├── cvta.ptx
│           │   ├── div_approx.ptx
│           │   ├── div_ftz.ptx
│           │   ├── div_noftz.ptx
│           │   ├── dp2a.ptx
│           │   ├── dp4a.ptx
│           │   ├── ex2.ptx
│           │   ├── extern_func.ptx
│           │   ├── extern_shared.ptx
│           │   ├── extern_shared_call.ptx
│           │   ├── fma.ptx
│           │   ├── fma_bf16x2.ptx
│           │   ├── fma_f16x2.ptx
│           │   ├── fmax.ptx
│           │   ├── func_ptr.ptx
│           │   ├── global_array.ptx
│           │   ├── global_array_f32.ptx
│           │   ├── implicit_param.ptx
│           │   ├── lanemask_lt.ptx
│           │   ├── ld_st.ptx
│           │   ├── ld_st_implicit.ptx
│           │   ├── ld_st_offset.ptx
│           │   ├── ldmatrix.ptx
│           │   ├── ldmatrix_trans.ptx
│           │   ├── lg2.ptx
│           │   ├── local_align.ptx
│           │   ├── mad_extended.ptx
│           │   ├── mad_s32.ptx
│           │   ├── mad_wide.ptx
│           │   ├── malformed_label.ptx
│           │   ├── max.ptx
│           │   ├── membar.ptx
│           │   ├── min.ptx
│           │   ├── min_f16.ptx
│           │   ├── min_nan_f16.ptx
│           │   ├── mma_m16n8k16_f32_bf16_bf16_f32.ptx
│           │   ├── mma_m16n8k16_f32_bf16_bf16_f32_2x.ptx
│           │   ├── mma_m16n8k16_f32_f16_f16_f32.ptx
│           │   ├── mma_m16n8k32_s32_s8_s8_s32.ptx
│           │   ├── mma_m16n8k32_s32_s8_s8_s32_interleave.ptx
│           │   ├── mod.rs
│           │   ├── mov.ptx
│           │   ├── mov_address.ptx
│           │   ├── mul24_hi_s32.ptx
│           │   ├── mul24_hi_u32.ptx
│           │   ├── mul24_lo_s32.ptx
│           │   ├── mul24_lo_u32.ptx
│           │   ├── mul_ftz.ptx
│           │   ├── mul_hi.ptx
│           │   ├── mul_lo.ptx
│           │   ├── mul_non_ftz.ptx
│           │   ├── mul_wide.ptx
│           │   ├── multiple_return.ptx
│           │   ├── nanosleep.ptx
│           │   ├── neg.ptx
│           │   ├── non_scalar_ptr_offset.ptx
│           │   ├── noreturn.ptx
│           │   ├── not.ptx
│           │   ├── ntid.ptx
│           │   ├── or.ptx
│           │   ├── param_is_addressable.ptx
│           │   ├── popc.ptx
│           │   ├── pred_not.ptx
│           │   ├── prmt.ptx
│           │   ├── prmt_slow.ptx
│           │   ├── rcp.ptx
│           │   ├── redux_sync_add_u32_partial.ptx
│           │   ├── redux_sync_op_s32.ptx
│           │   ├── redux_sync_op_u32.ptx
│           │   ├── reg_local.ptx
│           │   ├── reg_multi.ptx
│           │   ├── rem.ptx
│           │   ├── rsqrt.ptx
│           │   ├── sad_s64.ptx
│           │   ├── selp.ptx
│           │   ├── selp_true.ptx
│           │   ├── set_f16.ptx
│           │   ├── setp.ptx
│           │   ├── setp_gt.ptx
│           │   ├── setp_leu.ptx
│           │   ├── setp_nan.ptx
│           │   ├── setp_num.ptx
│           │   ├── shared_ptr_32.ptx
│           │   ├── shared_ptr_take_address.ptx
│           │   ├── shared_unify_extern.ptx
│           │   ├── shared_unify_local.ptx
│           │   ├── shared_variable.ptx
│           │   ├── shf_l.ptx
│           │   ├── shf_l_clamp.ptx
│           │   ├── shf_l_wrap.ptx
│           │   ├── shf_r.ptx
│           │   ├── shf_r_clamp.ptx
│           │   ├── shf_r_wrap.ptx
│           │   ├── shfl_sync_bfly_b32_pred.ptx
│           │   ├── shfl_sync_down_b32_pred.ptx
│           │   ├── shfl_sync_idx_b32_pred.ptx
│           │   ├── shfl_sync_mode_b32.ptx
│           │   ├── shfl_sync_up_b32_pred.ptx
│           │   ├── shl.ptx
│           │   ├── shr.ptx
│           │   ├── shr_oob.ptx
│           │   ├── sign_extend.ptx
│           │   ├── sin.ptx
│           │   ├── sqrt.ptx
│           │   ├── sqrt_rn_ftz.ptx
│           │   ├── stateful_ld_st_ntid.ptx
│           │   ├── stateful_ld_st_ntid_chain.ptx
│           │   ├── stateful_ld_st_ntid_sub.ptx
│           │   ├── stateful_ld_st_simple.ptx
│           │   ├── stateful_neg_offset.ptx
│           │   ├── sub.ptx
│           │   ├── sub_extended.ptx
│           │   ├── subc_cc_s32.ptx
│           │   ├── tanh.ptx
│           │   ├── tid.ptx
│           │   ├── trap.ptx
│           │   ├── uint_to_fp_bf16.ptx
│           │   ├── vector.ptx
│           │   ├── vector4.ptx
│           │   ├── vector8.ptx
│           │   ├── vector8_extract.ptx
│           │   ├── vector_extract.ptx
│           │   ├── vector_operand.ptx
│           │   ├── verify.py
│           │   ├── vote_all.ptx
│           │   ├── vote_all_sub.ptx
│           │   ├── vote_any.ptx
│           │   ├── vote_ballot.ptx
│           │   ├── warp_sz.ptx
│           │   └── xor.ptx
│           ├── vectorAdd_11.ptx
│           └── vectorAdd_kernel64.ptx
├── ptx_parser/
│   ├── Cargo.toml
│   └── src/
│       ├── ast.rs
│       ├── check_args.py
│       └── lib.rs
├── ptx_parser_macros/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── ptx_parser_macros_impl/
│   ├── Cargo.toml
│   └── src/
│       ├── lib.rs
│       └── parser.rs
├── ptxas/
│   ├── Cargo.toml
│   └── src/
│       └── main.rs
├── xtask/
│   ├── Cargo.toml
│   └── src/
│       └── main.rs
├── zluda/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── impl/
│       │   ├── context.rs
│       │   ├── device.rs
│       │   ├── driver.rs
│       │   ├── event.rs
│       │   ├── function.rs
│       │   ├── graph.rs
│       │   ├── hipfix.rs
│       │   ├── kernel.rs
│       │   ├── library.rs
│       │   ├── memory.rs
│       │   ├── mod.rs
│       │   ├── module.rs
│       │   ├── os_unix.rs
│       │   ├── os_win.rs
│       │   ├── pointer.rs
│       │   └── stream.rs
│       ├── lib.rs
│       ├── os_unix.rs
│       ├── os_win.rs
│       └── tests.rs
├── zluda_bindgen/
│   ├── Cargo.toml
│   ├── build/
│   │   ├── cublasLt_internal.h
│   │   ├── cublas_wrapper.h
│   │   ├── cuda_wrapper.h
│   │   ├── cudnn_v8/
│   │   │   ├── cudnn_adv_infer.h
│   │   │   ├── cudnn_adv_train.h
│   │   │   ├── cudnn_backend.h
│   │   │   ├── cudnn_cnn_infer.h
│   │   │   ├── cudnn_cnn_train.h
│   │   │   ├── cudnn_ops_infer.h
│   │   │   ├── cudnn_ops_train.h
│   │   │   └── cudnn_version.h
│   │   ├── cufft_wraper.h
│   │   └── decompile_cublaslt_internal.py
│   └── src/
│       ├── main.rs
│       └── process_table.rs
├── zluda_blas/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── impl.rs
│       ├── lib.rs
│       └── tests.rs
├── zluda_blaslt/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── impl.rs
│       └── lib.rs
├── zluda_cache/
│   ├── Cargo.toml
│   ├── diesel.toml
│   ├── migrations/
│   │   ├── .keep
│   │   └── 2025-08-04-203347_create_initial/
│   │       ├── down.sql
│   │       └── up.sql
│   └── src/
│       ├── lib.rs
│       ├── models.rs
│       └── schema.rs
├── zluda_common/
│   ├── Cargo.toml
│   └── src/
│       ├── constants.rs
│       ├── lib.rs
│       ├── os_unix.rs
│       └── os_win.rs
├── zluda_dnn/
│   ├── Cargo.toml
│   └── src/
│       ├── impl.rs
│       └── lib.rs
├── zluda_dnn8/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       └── lib.rs
├── zluda_dnn9/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── lib.rs
│       └── tests.rs
├── zluda_fft/
│   ├── Cargo.toml
│   └── src/
│       ├── impl.rs
│       └── lib.rs
├── zluda_inject/
│   ├── Cargo.toml
│   ├── build.rs
│   ├── src/
│   │   ├── args.rs
│   │   ├── bin.rs
│   │   ├── main.rs
│   │   └── win.rs
│   └── tests/
│       ├── helpers/
│       │   ├── direct_cuinit.rs
│       │   ├── do_cuinit.rs
│       │   ├── do_cuinit_early.rs
│       │   ├── do_cuinit_late.rs
│       │   ├── do_cuinit_late_clr.cs
│       │   ├── indirect_cuinit.rs
│       │   └── subprocess.rs
│       └── inject.rs
├── zluda_ld/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_ml/
│   ├── Cargo.toml
│   └── src/
│       ├── impl_common.rs
│       ├── impl_unix.rs
│       ├── impl_win.rs
│       └── lib.rs
├── zluda_precompile/
│   ├── Cargo.toml
│   └── src/
│       └── main.rs
├── zluda_redirect/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_sparse/
│   ├── Cargo.toml
│   ├── build.rs
│   └── src/
│       ├── impl.rs
│       └── lib.rs
├── zluda_trace/
│   ├── Cargo.toml
│   └── src/
│       ├── dark_api.rs
│       ├── lib.rs
│       ├── log.rs
│       ├── os_unix.rs
│       ├── os_win.rs
│       └── trace.rs
├── zluda_trace_blas/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_blaslt/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_common/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_dnn8/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_dnn9/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_fft/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_nvml/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── zluda_trace_sparse/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
└── zluda_windows/
    ├── Cargo.toml
    ├── library.manifest
    ├── manifest.rc
    └── src/
        └── lib.rs
Copy disabled (too large) Download .json
Condensed preview — 871 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (16,836K chars).
[
  {
    "path": ".cargo/config.toml",
    "chars": 124,
    "preview": "[alias]\r\nxtask = \"run --package xtask --\"\r\n\r\n[target.x86_64-pc-windows-msvc]\r\nrustflags = [\"-Ctarget-feature=+crt-static"
  },
  {
    "path": ".devcontainer/Dockerfile",
    "chars": 2553,
    "preview": "FROM nvidia/cuda:13.0.1-base-ubuntu24.04\n\nRUN DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninterac"
  },
  {
    "path": ".devcontainer/devcontainer.json",
    "chars": 843,
    "preview": "// For format details, see https://aka.ms/devcontainer.json. For config options, see the\n// README at: https://github.co"
  },
  {
    "path": ".git-blame-ignore-revs",
    "chars": 41,
    "preview": "21ef5f60a3a5efa17855a30f6b5c7d1968cd46ba\n"
  },
  {
    "path": ".gitattributes",
    "chars": 108,
    "preview": "ext/** linguist-vendored\n*.dll filter=lfs diff=lfs merge=lfs -text\n*.bc filter=lfs diff=lfs merge=lfs -text\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "chars": 27,
    "preview": "blank_issues_enabled: true\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/zluda_dump.yml",
    "chars": 2571,
    "preview": "name: Bug Report\ndescription: Report an issue with ZLUDA\nbody:\n  - type: markdown\n    attributes:\n      value: |\n       "
  },
  {
    "path": ".github/workflows/move_tests.sh",
    "chars": 348,
    "preview": "#!/bin/bash\nset -ex\nTEST_EXECUTABLES_DIR=$1\nSUFFIX=$2\n\nls ${TEST_EXECUTABLES_DIR}/* | sort -u | while read -r executable"
  },
  {
    "path": ".github/workflows/nightly_tests.yml",
    "chars": 1767,
    "preview": "name: Nightly tests\non:\n  workflow_call:\n  workflow_dispatch:\n\nenv:\n  ROCM_VERSION: \"6.3.4\"\n  AMDGPU_VERSION: \"6.4.4\"\n  "
  },
  {
    "path": ".github/workflows/pr_master.yml",
    "chars": 4907,
    "preview": "name: ZLUDA\non:\n  pull_request:\n    branches: [ master ]\n\nenv:\n  CARGO_TERM_COLOR: always\n  CARGO_PROFILE: release\n  SCC"
  },
  {
    "path": ".github/workflows/push_master.yml",
    "chars": 6229,
    "preview": "name: ZLUDA\non:\n  workflow_dispatch:\n  push:\n    branches: [ master ]\n\nenv:\n  CARGO_TERM_COLOR: always\n  CARGO_PROFILE: "
  },
  {
    "path": ".github/workflows/rocm_setup_build.sh",
    "chars": 1058,
    "preview": "#!/bin/bash\nset -ex\nROCM_VERSION=$1\n\nDEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends gpg patchelf\n"
  },
  {
    "path": ".github/workflows/rocm_setup_run.sh",
    "chars": 1607,
    "preview": "#!/bin/bash\nset -ex\nROCM_VERSION=$1\nAMDGPU_VERSION=$2\n\nDEBIAN_FRONTEND=noninteractive apt install -y --no-install-recomm"
  },
  {
    "path": ".github/workflows/trigger_nightly_tests.yml",
    "chars": 801,
    "preview": "name: Trigger nightly tests\non:\n  schedule:\n    - cron: \"0 8 * * *\"\n\njobs:\n  check_last_nightly_run:\n    runs-on: 'ubunt"
  },
  {
    "path": ".gitignore",
    "chars": 63,
    "preview": "target/\nCargo.lock\n\n.vscode/\n.idea/\n\nptx/lib/zluda_ptx_impl.ll\n"
  },
  {
    "path": ".gitmodules",
    "chars": 240,
    "preview": "[submodule \"ext/llvm-project\"]\n\tpath = ext/llvm-project\n\turl = https://github.com/vosen/llvm-project.git\n\tbranch = main\n"
  },
  {
    "path": ".rustfmt.toml",
    "chars": 23,
    "preview": "newline_style = \"Unix\"\n"
  },
  {
    "path": "Cargo.toml",
    "chars": 1524,
    "preview": "[workspace]\n\nresolver = \"2\"\n\nmembers = [\n    \"cuda_check\",\n    \"cuda_macros\",\n    \"cuda_types\",\n    \"dark_api\",\n    \"det"
  },
  {
    "path": "LICENSE-APACHE",
    "chars": 9723,
    "preview": "                              Apache License\n                        Version 2.0, January 2004\n                     http"
  },
  {
    "path": "LICENSE-MIT",
    "chars": 1023,
    "preview": "Permission is hereby granted, free of charge, to any\nperson obtaining a copy of this software and associated\ndocumentati"
  },
  {
    "path": "README.md",
    "chars": 718,
    "preview": "ZLUDA is a drop-in replacement for CUDA on non-NVIDIA GPUs. ZLUDA allows running unmodified CUDA applications using non-"
  },
  {
    "path": "compiler/Cargo.toml",
    "chars": 471,
    "preview": "[package]\nname = \"compiler\"\ndescription = \"ZLUDA offline compiler\"\nversion = \"0.0.0\"\nauthors = [\"Joëlle van Essen <joell"
  },
  {
    "path": "compiler/src/error.rs",
    "chars": 1805,
    "preview": "use ptx::TranslateError;\nuse ptx_parser::PtxError;\nuse std::ffi::FromBytesUntilNulError;\nuse std::io;\nuse std::str::Utf8"
  },
  {
    "path": "compiler/src/main.rs",
    "chars": 19415,
    "preview": "use bpaf::Bpaf;\nuse error::CompilerError;\nuse std::ffi::CStr;\nuse std::fs::{self, File};\nuse std::io::{self, Write};\nuse"
  },
  {
    "path": "cuda_check/Cargo.toml",
    "chars": 579,
    "preview": "[package]\nname = \"cuda_check\"\nversion = \"0.0.0\"\nauthors = [\"Andrzej Janik <vosen@vosen.pl>\"]\nedition = \"2021\"\n\n[[bin]]\nn"
  },
  {
    "path": "cuda_check/src/main.rs",
    "chars": 107,
    "preview": "#[cfg(windows)]\nmod win;\n\n#[cfg(windows)]\nfn main() {\n    win::main()\n}\n\n#[cfg(not(windows))]\nfn main() {}\n"
  },
  {
    "path": "cuda_check/src/win.rs",
    "chars": 10902,
    "preview": "use bpaf::{construct, pure, Bpaf, Parser};\nuse owo_colors::{OwoColorize, Stream};\nuse rand::seq::SliceRandom;\nuse std::{"
  },
  {
    "path": "cuda_macros/.rustfmt.toml",
    "chars": 30,
    "preview": "disable_all_formatting = true\n"
  },
  {
    "path": "cuda_macros/Cargo.toml",
    "chars": 283,
    "preview": "[package]\nname = \"cuda_macros\"\nversion = \"0.0.0\"\nauthors = [\"Andrzej Janik <vosen@vosen.pl>\"]\nedition = \"2021\"\n\n[depende"
  },
  {
    "path": "cuda_macros/build/wrapper.h",
    "chars": 169,
    "preview": "#define __CUDA_API_VERSION_INTERNAL\n#include <cuda.h>\n#include <cudaProfiler.h>\n#include <cudaGL.h>\n#include <cudaEGL.h>"
  },
  {
    "path": "cuda_macros/src/cublas.rs",
    "chars": 245364,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n    #[must_use"
  },
  {
    "path": "cuda_macros/src/cublaslt.rs",
    "chars": 30240,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n    #[must_use"
  },
  {
    "path": "cuda_macros/src/cublaslt_internal.rs",
    "chars": 68573,
    "preview": "/* automatically generated by rust-bindgen 0.70.1 */\n\nextern \"system\" {\n    fn cublasLtShutdownCtx(param_1: ::core::ffi:"
  },
  {
    "path": "cuda_macros/src/cuda.rs",
    "chars": 917710,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n    /** \\brief"
  },
  {
    "path": "cuda_macros/src/cudnn8.rs",
    "chars": 118378,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n    fn cudnnGe"
  },
  {
    "path": "cuda_macros/src/cudnn9.rs",
    "chars": 94345,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n    fn cudnnGe"
  },
  {
    "path": "cuda_macros/src/cufft.rs",
    "chars": 15917,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n    fn cufftPl"
  },
  {
    "path": "cuda_macros/src/cusparse.rs",
    "chars": 238716,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n    #[must_use"
  },
  {
    "path": "cuda_macros/src/lib.rs",
    "chars": 12261,
    "preview": "extern crate proc_macro;\n\nuse proc_macro::TokenStream;\nuse proc_macro2::Span;\nuse quote::{format_ident, quote, ToTokens}"
  },
  {
    "path": "cuda_macros/src/nvml.rs",
    "chars": 517441,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n    #[must_use"
  },
  {
    "path": "cuda_types/.rustfmt.toml",
    "chars": 30,
    "preview": "disable_all_formatting = true\n"
  },
  {
    "path": "cuda_types/Cargo.toml",
    "chars": 514,
    "preview": "[package]\nname = \"cuda_types\"\nversion = \"0.0.0\"\nauthors = [\"Andrzej Janik <vosen@vosen.pl>\"]\nedition = \"2021\"\n\n[dependen"
  },
  {
    "path": "cuda_types/src/cublas.rs",
    "chars": 14296,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
  },
  {
    "path": "cuda_types/src/cublaslt.rs",
    "chars": 162732,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
  },
  {
    "path": "cuda_types/src/cuda.rs",
    "chars": 381613,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub const CUDA_VERSION: u32 = 13"
  },
  {
    "path": "cuda_types/src/cudnn.rs",
    "chars": 48985,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\n#[repr(C)]\n#[derive(Debug, Copy,"
  },
  {
    "path": "cuda_types/src/cudnn8.rs",
    "chars": 25888,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
  },
  {
    "path": "cuda_types/src/cudnn9.rs",
    "chars": 97036,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
  },
  {
    "path": "cuda_types/src/cufft.rs",
    "chars": 17377,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
  },
  {
    "path": "cuda_types/src/cusparse.rs",
    "chars": 21285,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
  },
  {
    "path": "cuda_types/src/dark_api.rs",
    "chars": 2623,
    "preview": "use bitflags::bitflags;\nuse std::ffi::{c_uint, c_ulonglong, c_ushort, c_void};\n\n/*\nfat_cubin:\ntypedef struct {\n  int mag"
  },
  {
    "path": "cuda_types/src/lib.rs",
    "chars": 178,
    "preview": "pub enum FILE {}\n\npub mod cublas;\npub mod cublaslt;\npub mod cuda;\npub mod cudnn;\npub mod cudnn8;\npub mod cudnn9;\npub mod"
  },
  {
    "path": "cuda_types/src/nvml.rs",
    "chars": 237063,
    "preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
  },
  {
    "path": "dark_api/Cargo.toml",
    "chars": 283,
    "preview": "[package]\nname = \"dark_api\"\nversion = \"0.0.0\"\nedition = \"2021\"\n\n[dependencies]\ncuda_types = { path = \"../cuda_types\" }\nf"
  },
  {
    "path": "dark_api/src/fatbin.rs",
    "chars": 9978,
    "preview": "// This file contains a higher-level interface for parsing fatbins\n\nuse std::{borrow::Cow, ptr};\n\nuse cuda_types::dark_a"
  },
  {
    "path": "dark_api/src/lib.rs",
    "chars": 26570,
    "preview": "use std::ffi::c_void;\n\nuse cuda_types::cuda::CUuuid;\n\npub mod fatbin;\n\nmacro_rules! dark_api_init {\n    (SIZE_OF, $table"
  },
  {
    "path": "detours-sys/Cargo.toml",
    "chars": 801,
    "preview": "[package]\nname = \"detours-sys\"\nversion = \"0.1.2\"\nauthors = [\"Diana <5275194+DianaNites@users.noreply.github.com>\"]\nediti"
  },
  {
    "path": "detours-sys/LICENSE-APACHE",
    "chars": 11358,
    "preview": "\n                                 Apache License\n                           Version 2.0, January 2004\n                  "
  },
  {
    "path": "detours-sys/LICENSE-MIT",
    "chars": 1045,
    "preview": "Copyright 2019 Diana\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and "
  },
  {
    "path": "detours-sys/README.md",
    "chars": 1083,
    "preview": "# Detours-sys\n\n[![Crates.io](https://img.shields.io/crates/v/detours-sys.svg)](https://crates.io/crates/detours-sys)\n![m"
  },
  {
    "path": "detours-sys/build/wrapper.h",
    "chars": 42,
    "preview": "#include <windows.h>\n#include <detours.h>\n"
  },
  {
    "path": "detours-sys/build.rs",
    "chars": 1346,
    "preview": "#[cfg(not(target_os = \"windows\"))]\nfn main() {}\n\n#[cfg(target_os = \"windows\")]\nfn main() -> Result<(), Box<dyn std::erro"
  },
  {
    "path": "detours-sys/src/bundled_bindings.rs",
    "chars": 31903,
    "preview": "/* automatically generated by rust-bindgen 0.56.0 */\n\npub type wchar_t = ::std::os::raw::c_ushort;\npub type ULONG = ::st"
  },
  {
    "path": "detours-sys/src/lib.rs",
    "chars": 2348,
    "preview": "#![cfg(target_os = \"windows\")]\n\n//! Bindings to the Microsoft Detours API.\n#![allow(non_camel_case_types)]\n#![allow(non_"
  },
  {
    "path": "docs/.gitignore",
    "chars": 5,
    "preview": "book\n"
  },
  {
    "path": "docs/.readthedocs.yaml",
    "chars": 237,
    "preview": "version: 2\nbuild:\n  os: ubuntu-lts-latest\n  tools:\n    rust: latest\n  jobs:\n    install:\n      - cargo install mdbook@0."
  },
  {
    "path": "docs/book.toml",
    "chars": 79,
    "preview": "[book]\nauthors = [\"Andrzej Janik\"]\nlanguage = \"en\"\nsrc = \"src\"\ntitle = \"ZLUDA\"\n"
  },
  {
    "path": "docs/src/SUMMARY.md",
    "chars": 335,
    "preview": "# Summary\n\n# General\n- [Quick start](./quick_start.md)\n- [Installing HIP SDK](./hip_sdk.md)\n- [FAQ](./faq.md)\n\n# Trouble"
  },
  {
    "path": "docs/src/building.md",
    "chars": 614,
    "preview": "# Building\n\n## Dependencies\n\n* Git\n* CMake\n* Python 3\n* Rust compiler (recent version)\n* C++ compiler\n* (Linux only) HIP"
  },
  {
    "path": "docs/src/faq.md",
    "chars": 4275,
    "preview": "# FAQ\n\n> [!WARNING]\n> For legal reasons we can't help you with the pre-rollback versions (older than 4). See more here: "
  },
  {
    "path": "docs/src/hip_sdk.md",
    "chars": 3158,
    "preview": "# Installing HIP SDK\n\nOn Windows, in addition to installing the GPU driver, you need to install the HIP SDK. Choose one "
  },
  {
    "path": "docs/src/llama_cpp.md",
    "chars": 472,
    "preview": "# llama.cpp\n\nllama.cpp runs at native speed when compiled for CUDA architecture 86 and with cuBLAS enabled:\n```\ncmake -B"
  },
  {
    "path": "docs/src/precompiling.md",
    "chars": 738,
    "preview": "# Precompiling\n\nConsider precompiling the GPU code with `zluda_precompile` if you are trying to run a large application."
  },
  {
    "path": "docs/src/quick_start.md",
    "chars": 1799,
    "preview": "# Quick start\n\n> [!WARNING]\n> This version of ZLUDA is under heavy development and will likely not work with your applic"
  },
  {
    "path": "docs/src/troubleshooting.md",
    "chars": 8600,
    "preview": "# Logging\n\n## Introduction\n\nzluda_trace is a [shim](https://en.wikipedia.org/wiki/Shim_(computing))\nfor the CUDA API whi"
  },
  {
    "path": "ext/detours/.github/ISSUE_TEMPLATE/bug-report.md",
    "chars": 1990,
    "preview": "---\r\nname: Bug Report\r\nabout: Report a bug in Detours\r\ntitle: \"<header>: Problem\"\r\nlabels: 'bug'\r\nassignees: ''\r\n\r\n---\r\n"
  },
  {
    "path": "ext/detours/.github/ISSUE_TEMPLATE/question.md",
    "chars": 1382,
    "preview": "---\r\nname: Question\r\nabout: Ask a question about Detours\r\ntitle: \"\"\r\nlabels: question\r\nassignees: ''\r\n\r\n---\r\n\r\nInstructi"
  },
  {
    "path": "ext/detours/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md",
    "chars": 493,
    "preview": "<!--\r\nBefore submitting a pull request, please ensure that:\r\n\r\n* These changes introduce no known API breaks (changing t"
  },
  {
    "path": "ext/detours/.github/codeql/codeql-config.yml",
    "chars": 108,
    "preview": "---\r\nname: \"Detours CodeQL Config\"\r\n\r\nqueries:\r\n - uses: security-and-quality\r\n - uses: security-extended \r\n"
  },
  {
    "path": "ext/detours/.github/workflows/main.yml",
    "chars": 2466,
    "preview": "name: CI-Build\n\nenv:\n  # Turn on msvc analysis during build, enable once warnings are clean.\n  DETOURS_ANALYZE: true\n\n  "
  },
  {
    "path": "ext/detours/.gitignore",
    "chars": 424,
    "preview": "# C extensions\r\n*.so\r\n\r\n# Unit test / coverage reports\r\n.coverage\r\n.tox\r\nnosetests.xml\r\n\r\n# Translations\r\n*.mo\r\n\r\n# Mr D"
  },
  {
    "path": "ext/detours/CREDITS.TXT",
    "chars": 4987,
    "preview": "==============================================================================\nThe following individuals have helped ide"
  },
  {
    "path": "ext/detours/LICENSE.md",
    "chars": 1099,
    "preview": "# Copyright (c) Microsoft Corporation\n\nAll rights reserved.\n\n# MIT License\n\nPermission is hereby granted, free of charge"
  },
  {
    "path": "ext/detours/Makefile",
    "chars": 2065,
    "preview": "##############################################################################\n##\n##  Makefile for Detours.\n##\n##  Micro"
  },
  {
    "path": "ext/detours/README.md",
    "chars": 3177,
    "preview": "# Microsoft Research Detours Package\n\nDetours is a software package for monitoring and instrumenting API calls on Window"
  },
  {
    "path": "ext/detours/samples/Makefile",
    "chars": 9833,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/README.TXT",
    "chars": 2614,
    "preview": "##############################################################################\n##\n##  Samples README File\n##\n##  Microso"
  },
  {
    "path": "ext/detours/samples/comeasy/Makefile",
    "chars": 3913,
    "preview": "##############################################################################\n##\n##  API Extension to Measure time slep"
  },
  {
    "path": "ext/detours/samples/comeasy/comeasy.cpp",
    "chars": 1450,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (comeasy.cpp o"
  },
  {
    "path": "ext/detours/samples/comeasy/wrotei.cpp",
    "chars": 5104,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (wrotei.cpp of"
  },
  {
    "path": "ext/detours/samples/comeasy/wrotei.rc",
    "chars": 560,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Version information for wrotei.rc."
  },
  {
    "path": "ext/detours/samples/commem/Makefile",
    "chars": 1198,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/commem/commem.cpp",
    "chars": 3243,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour functions of a COM interfac"
  },
  {
    "path": "ext/detours/samples/common.mak",
    "chars": 2015,
    "preview": "##############################################################################\n##\n##  Common makefile for Detours test p"
  },
  {
    "path": "ext/detours/samples/cping/Makefile",
    "chars": 3424,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/cping/ReadMe.Txt",
    "chars": 1524,
    "preview": "Microsoft Research Detours Package\n==============================================================================\n4/2/98"
  },
  {
    "path": "ext/detours/samples/cping/cping.cpp",
    "chars": 65027,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Module: cping.cpp (cping.exe)\n//\n/"
  },
  {
    "path": "ext/detours/samples/cping/iping.idl",
    "chars": 681,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Module: iping.idl (cping.exe - COM"
  },
  {
    "path": "ext/detours/samples/disas/Makefile",
    "chars": 2081,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/disas/arm.asm",
    "chars": 6759,
    "preview": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n;;\n;;  Detours Test Program (rlo.asm/disas"
  },
  {
    "path": "ext/detours/samples/disas/disas.cpp",
    "chars": 20476,
    "preview": "/////////////////////////////////////////////////////////////////////////////\n//\n//  Module: disas.cpp (disas.exe - Deto"
  },
  {
    "path": "ext/detours/samples/disas/ia64.asm",
    "chars": 28061,
    "preview": "/////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (ia64.asm/disa"
  },
  {
    "path": "ext/detours/samples/disas/unk.cpp",
    "chars": 309,
    "preview": "/////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (x86.asm of di"
  },
  {
    "path": "ext/detours/samples/disas/x64.asm",
    "chars": 16917,
    "preview": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n;;\n;;  Detours Test Program (x64.asm/disas"
  },
  {
    "path": "ext/detours/samples/disas/x86.cpp",
    "chars": 5082,
    "preview": "/////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (x86.asm of di"
  },
  {
    "path": "ext/detours/samples/dtest/Makefile",
    "chars": 3003,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/dtest/NORMAL_IA64.TXT",
    "chars": 5329,
    "preview": "    ..\\..\\bin.IA64\\dtest.exe\nCalling LocalTarget1 w/o detour\n  LocalTarget1 (1)\nCalling LocalTarget1 w/ detour\n  MyLocal"
  },
  {
    "path": "ext/detours/samples/dtest/NORMAL_X64.TXT",
    "chars": 5328,
    "preview": "    ..\\..\\bin.X64\\dtest.exe\nCalling LocalTarget1 w/o detour\n  LocalTarget1 (1)\nCalling LocalTarget1 w/ detour\n  MyLocalT"
  },
  {
    "path": "ext/detours/samples/dtest/NORMAL_X86.TXT",
    "chars": 5328,
    "preview": "    ..\\..\\bin.X86\\dtest.exe\nCalling LocalTarget1 w/o detour\n  LocalTarget1 (1)\nCalling LocalTarget1 w/ detour\n  MyLocalT"
  },
  {
    "path": "ext/detours/samples/dtest/dtarge.cpp",
    "chars": 11010,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (dtarge.dll)\n"
  },
  {
    "path": "ext/detours/samples/dtest/dtarge.h",
    "chars": 3486,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (dtarge.h of "
  },
  {
    "path": "ext/detours/samples/dtest/dtarge.rc",
    "chars": 556,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Version information for dtarge.rc."
  },
  {
    "path": "ext/detours/samples/dtest/dtest.cpp",
    "chars": 26200,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (dtest.cpp of"
  },
  {
    "path": "ext/detours/samples/dumpe/Makefile",
    "chars": 1329,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/dumpe/dumpe.cpp",
    "chars": 3294,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (dumpe.cpp of"
  },
  {
    "path": "ext/detours/samples/dumpi/Makefile",
    "chars": 1204,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/dumpi/dumpi.cpp",
    "chars": 7575,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (dumpi.cpp of"
  },
  {
    "path": "ext/detours/samples/dynamic_alloc/Makefile",
    "chars": 1814,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/dynamic_alloc/main.cpp",
    "chars": 5738,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program\n//\n//  Micros"
  },
  {
    "path": "ext/detours/samples/dynamic_alloc/x64.asm",
    "chars": 403,
    "preview": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n;;\n;;  Detours Test Program\n;;\n;;  Microso"
  },
  {
    "path": "ext/detours/samples/dynamic_alloc/x86.asm",
    "chars": 433,
    "preview": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n;;\n;;  Detours Test Program\n;;\n;;  Microso"
  },
  {
    "path": "ext/detours/samples/echo/Makefile",
    "chars": 3259,
    "preview": "##############################################################################\n##\n##  Detours Test Program\n##\n##  Micros"
  },
  {
    "path": "ext/detours/samples/echo/echofx.cpp",
    "chars": 1538,
    "preview": "//\n//\n//\n#include <windows.h>\n#include <detours.h>\n#include <stdio.h>\n\nint WINAPI Echo(PCSTR pszMsg);\n\nstatic int (WINAP"
  },
  {
    "path": "ext/detours/samples/echo/echofx.rc",
    "chars": 569,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Version information for echofx.rc."
  },
  {
    "path": "ext/detours/samples/echo/echonul.cpp",
    "chars": 187,
    "preview": "//\n//\n//\n#include <windows.h>\n\nint WINAPI Echo(PCSTR pszMsg)\n{\n    int sum = 0;\n    while (*pszMsg) {\n        sum = sum "
  },
  {
    "path": "ext/detours/samples/echo/main.cpp",
    "chars": 459,
    "preview": "//\n//\n//\n#include <windows.h>\n\nint WINAPI Echo(PCSTR pszMsg);\n\nextern \"C\" int __stdcall mainCRTStartup(HINSTANCE hInstan"
  },
  {
    "path": "ext/detours/samples/einst/Makefile",
    "chars": 4813,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/einst/edll1x.cpp",
    "chars": 1312,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (edll1x.cpp o"
  },
  {
    "path": "ext/detours/samples/einst/edll2x.cpp",
    "chars": 1323,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (edll2x.cpp o"
  },
  {
    "path": "ext/detours/samples/einst/edll3x.cpp",
    "chars": 1910,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (edll3x.cpp o"
  },
  {
    "path": "ext/detours/samples/einst/einst.cpp",
    "chars": 2375,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (einst.cpp of"
  },
  {
    "path": "ext/detours/samples/excep/Makefile",
    "chars": 1242,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/excep/excep.cpp",
    "chars": 3824,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  First Chance Exception Handling Te"
  },
  {
    "path": "ext/detours/samples/excep/firstexc.cpp",
    "chars": 6642,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (firstexc.cpp"
  },
  {
    "path": "ext/detours/samples/excep/firstexc.h",
    "chars": 609,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (firstexc.h o"
  },
  {
    "path": "ext/detours/samples/findfunc/Makefile",
    "chars": 6218,
    "preview": "##############################################################################\n##\n##  Program to test DetourFindFunction"
  },
  {
    "path": "ext/detours/samples/findfunc/extend.cpp",
    "chars": 5015,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (extend.cpp of"
  },
  {
    "path": "ext/detours/samples/findfunc/extend.rc",
    "chars": 577,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Version information for extend.rc."
  },
  {
    "path": "ext/detours/samples/findfunc/findfunc.cpp",
    "chars": 955,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (findfunc.cpp "
  },
  {
    "path": "ext/detours/samples/findfunc/symtest.cpp",
    "chars": 13459,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (symtest.cpp o"
  },
  {
    "path": "ext/detours/samples/findfunc/target.cpp",
    "chars": 1044,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (target.cpp of"
  },
  {
    "path": "ext/detours/samples/findfunc/target.h",
    "chars": 375,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (target.h of t"
  },
  {
    "path": "ext/detours/samples/findfunc/target.rc",
    "chars": 556,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Version information for target.rc."
  },
  {
    "path": "ext/detours/samples/impmunge/Makefile",
    "chars": 1709,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/impmunge/impmunge.cpp",
    "chars": 13492,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (impmunge.cpp"
  },
  {
    "path": "ext/detours/samples/member/Makefile",
    "chars": 1176,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/member/member.cpp",
    "chars": 3777,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Test a detour of a member function"
  },
  {
    "path": "ext/detours/samples/opengl/Makefile",
    "chars": 3710,
    "preview": "######################################################################\n##\n##  Hook test for glFinish\n##\n\n!include ..\\com"
  },
  {
    "path": "ext/detours/samples/opengl/ogldet.cpp",
    "chars": 2053,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Module:     ogldet.dll\n//\n//  This"
  },
  {
    "path": "ext/detours/samples/opengl/ogldet.rc",
    "chars": 564,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Version information for ogldet.rc."
  },
  {
    "path": "ext/detours/samples/opengl/testogl.cpp",
    "chars": 470,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  File:       testogl.cpp\n//  Module"
  },
  {
    "path": "ext/detours/samples/region/Makefile",
    "chars": 1176,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/region/region.cpp",
    "chars": 2926,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Test the different system region b"
  },
  {
    "path": "ext/detours/samples/setdll/Makefile",
    "chars": 1916,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/setdll/setdll.cpp",
    "chars": 9684,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (setdll.cpp o"
  },
  {
    "path": "ext/detours/samples/simple/Makefile",
    "chars": 3885,
    "preview": "##############################################################################\n##\n##  API Extention to Measure time slep"
  },
  {
    "path": "ext/detours/samples/simple/simple.cpp",
    "chars": 2322,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (simple.cpp o"
  },
  {
    "path": "ext/detours/samples/simple/simple.rc",
    "chars": 556,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Version information for simple.rc."
  },
  {
    "path": "ext/detours/samples/simple/sleep5.cpp",
    "chars": 643,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (sleep5.cpp o"
  },
  {
    "path": "ext/detours/samples/slept/Makefile",
    "chars": 7026,
    "preview": "##############################################################################\n##\n##  API Extension to Measure time slep"
  },
  {
    "path": "ext/detours/samples/slept/NORMAL_IA64.TXT",
    "chars": 8721,
    "preview": "-------- Reseting test binaries to initial state. -----------------------\n    ..\\..\\bin.IA64\\setdll.exe -r ..\\..\\bin.IA6"
  },
  {
    "path": "ext/detours/samples/slept/NORMAL_X64.TXT",
    "chars": 8248,
    "preview": "-------- Reseting test binaries to initial state. -----------------------\n    ..\\..\\bin.X64\\setdll.exe -r ..\\..\\bin.X64\\"
  },
  {
    "path": "ext/detours/samples/slept/NORMAL_X86.TXT",
    "chars": 6930,
    "preview": "-------- Reseting test binaries to initial state. -----------------------\n    ..\\..\\bin.X86\\setdll.exe -r ..\\..\\bin.X86\\"
  },
  {
    "path": "ext/detours/samples/slept/dslept.cpp",
    "chars": 3992,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (dslept.cpp of"
  },
  {
    "path": "ext/detours/samples/slept/dslept.rc",
    "chars": 570,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Version information for dslept.rc."
  },
  {
    "path": "ext/detours/samples/slept/sleepbed.cpp",
    "chars": 2803,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (sleepbed.cpp "
  },
  {
    "path": "ext/detours/samples/slept/sleepnew.cpp",
    "chars": 2366,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (sleepnew.cpp "
  },
  {
    "path": "ext/detours/samples/slept/sleepold.cpp",
    "chars": 1767,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (sleepold.cpp "
  },
  {
    "path": "ext/detours/samples/slept/slept.cpp",
    "chars": 3330,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (slept.cpp of "
  },
  {
    "path": "ext/detours/samples/slept/slept.h",
    "chars": 572,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (slept.h of sl"
  },
  {
    "path": "ext/detours/samples/slept/slept.rc",
    "chars": 559,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Version information for sleep.rc.\n"
  },
  {
    "path": "ext/detours/samples/slept/verify.cpp",
    "chars": 1919,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detour Test Program (verify.cpp)\n/"
  },
  {
    "path": "ext/detours/samples/syelog/Makefile",
    "chars": 2990,
    "preview": "##############################################################################\n##\n##  Makefile for Detours.\n##\n##  Micro"
  },
  {
    "path": "ext/detours/samples/syelog/sltest.cpp",
    "chars": 4408,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (sltest.cpp o"
  },
  {
    "path": "ext/detours/samples/syelog/sltestp.cpp",
    "chars": 3357,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (sltestp.cpp "
  },
  {
    "path": "ext/detours/samples/syelog/syelog.cpp",
    "chars": 29209,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (syelog.cpp o"
  },
  {
    "path": "ext/detours/samples/syelog/syelog.h",
    "chars": 3534,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (syelog.h of "
  },
  {
    "path": "ext/detours/samples/syelog/syelogd.cpp",
    "chars": 17253,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (syelogd.cpp "
  },
  {
    "path": "ext/detours/samples/talloc/Makefile",
    "chars": 12487,
    "preview": "##############################################################################\n##\n##  Makefile for Detours Test Programs"
  },
  {
    "path": "ext/detours/samples/talloc/NORMAL_IA64.TXT",
    "chars": 3549,
    "preview": "talloc.exe: Detoured functions.\n\n       Address         Size: Typ Sta Prot Ini : Contents\n  ------------ ------------: -"
  },
  {
    "path": "ext/detours/samples/talloc/NORMAL_X64.TXT",
    "chars": 3353,
    "preview": "talloc.exe: Detoured functions.\n\n       Address         Size: Typ Sta Prot Ini : Contents\n  ------------ ------------: -"
  },
  {
    "path": "ext/detours/samples/talloc/talloc.cpp",
    "chars": 16295,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (talloc.cpp o"
  },
  {
    "path": "ext/detours/samples/talloc/tdll1x.cpp",
    "chars": 525,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tdll1x.cpp o"
  },
  {
    "path": "ext/detours/samples/talloc/tdll2x.cpp",
    "chars": 525,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tdll2x.cpp o"
  },
  {
    "path": "ext/detours/samples/talloc/tdll3x.cpp",
    "chars": 525,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tdll3x.cpp o"
  },
  {
    "path": "ext/detours/samples/talloc/tdll4x.cpp",
    "chars": 525,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tdll4x.cpp o"
  },
  {
    "path": "ext/detours/samples/talloc/tdll5x.cpp",
    "chars": 525,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tdll5x.cpp o"
  },
  {
    "path": "ext/detours/samples/talloc/tdll6x.cpp",
    "chars": 525,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tdll6x.cpp o"
  },
  {
    "path": "ext/detours/samples/talloc/tdll7x.cpp",
    "chars": 525,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tdll7x.cpp o"
  },
  {
    "path": "ext/detours/samples/talloc/tdll8x.cpp",
    "chars": 525,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tdll8x.cpp o"
  },
  {
    "path": "ext/detours/samples/talloc/tdll9x.cpp",
    "chars": 525,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tdll9x.cpp o"
  },
  {
    "path": "ext/detours/samples/traceapi/Makefile",
    "chars": 5555,
    "preview": "##############################################################################\n##\n##  Utility to trace Win32 APIs.\n##\n##"
  },
  {
    "path": "ext/detours/samples/traceapi/_win32.cpp",
    "chars": 1057533,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (_win32.cpp o"
  },
  {
    "path": "ext/detours/samples/traceapi/testapi.cpp",
    "chars": 2017,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (testapi.cpp "
  },
  {
    "path": "ext/detours/samples/traceapi/trcapi.cpp",
    "chars": 14280,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (trcapi.cpp o"
  },
  {
    "path": "ext/detours/samples/traceapi/trcapi.rc",
    "chars": 569,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Version information for trcapi.rc."
  },
  {
    "path": "ext/detours/samples/tracebld/Makefile",
    "chars": 4893,
    "preview": "##############################################################################\n##\n##  Utility to registry and file acces"
  },
  {
    "path": "ext/detours/samples/tracebld/tracebld.cpp",
    "chars": 17992,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tracebld.cpp"
  },
  {
    "path": "ext/detours/samples/tracebld/tracebld.h",
    "chars": 1654,
    "preview": "//////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (tracebld.h o"
  },
  {
    "path": "ext/detours/samples/tracebld/trcbld.cpp",
    "chars": 132100,
    "preview": "/////////////////////////////////////////////////////////////////////////////\n//\n//  Detours Test Program (trcbld.cpp of"
  }
]

// ... and 671 more files (download for full content)

About this extraction

This page contains the full source code of the vosen/ZLUDA GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 871 files (17.8 MB), approximately 4.1M tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!