Copy disabled (too large)
Download .txt
Showing preview only (16,366K chars total). Download the full file to get everything.
Repository: vosen/ZLUDA
Branch: master
Commit: dcc6bb8fdad2
Files: 871
Total size: 17.8 MB
Directory structure:
gitextract_ehz427hu/
├── .cargo/
│ └── config.toml
├── .devcontainer/
│ ├── Dockerfile
│ └── devcontainer.json
├── .git-blame-ignore-revs
├── .gitattributes
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── config.yml
│ │ └── zluda_dump.yml
│ └── workflows/
│ ├── move_tests.sh
│ ├── nightly_tests.yml
│ ├── pr_master.yml
│ ├── push_master.yml
│ ├── rocm_setup_build.sh
│ ├── rocm_setup_run.sh
│ └── trigger_nightly_tests.yml
├── .gitignore
├── .gitmodules
├── .rustfmt.toml
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── compiler/
│ ├── Cargo.toml
│ └── src/
│ ├── error.rs
│ └── main.rs
├── cuda_check/
│ ├── Cargo.toml
│ └── src/
│ ├── main.rs
│ └── win.rs
├── cuda_macros/
│ ├── .rustfmt.toml
│ ├── Cargo.toml
│ ├── build/
│ │ └── wrapper.h
│ └── src/
│ ├── cublas.rs
│ ├── cublaslt.rs
│ ├── cublaslt_internal.rs
│ ├── cuda.rs
│ ├── cudnn8.rs
│ ├── cudnn9.rs
│ ├── cufft.rs
│ ├── cusparse.rs
│ ├── lib.rs
│ └── nvml.rs
├── cuda_types/
│ ├── .rustfmt.toml
│ ├── Cargo.toml
│ └── src/
│ ├── cublas.rs
│ ├── cublaslt.rs
│ ├── cuda.rs
│ ├── cudnn.rs
│ ├── cudnn8.rs
│ ├── cudnn9.rs
│ ├── cufft.rs
│ ├── cusparse.rs
│ ├── dark_api.rs
│ ├── lib.rs
│ └── nvml.rs
├── dark_api/
│ ├── Cargo.toml
│ └── src/
│ ├── fatbin.rs
│ └── lib.rs
├── detours-sys/
│ ├── Cargo.toml
│ ├── LICENSE-APACHE
│ ├── LICENSE-MIT
│ ├── README.md
│ ├── build/
│ │ └── wrapper.h
│ ├── build.rs
│ └── src/
│ ├── bundled_bindings.rs
│ └── lib.rs
├── docs/
│ ├── .gitignore
│ ├── .readthedocs.yaml
│ ├── book.toml
│ └── src/
│ ├── SUMMARY.md
│ ├── building.md
│ ├── faq.md
│ ├── hip_sdk.md
│ ├── llama_cpp.md
│ ├── precompiling.md
│ ├── quick_start.md
│ └── troubleshooting.md
├── ext/
│ ├── detours/
│ │ ├── .github/
│ │ │ ├── ISSUE_TEMPLATE/
│ │ │ │ ├── bug-report.md
│ │ │ │ └── question.md
│ │ │ ├── PULL_REQUEST_TEMPLATE/
│ │ │ │ └── pull_request_template.md
│ │ │ ├── codeql/
│ │ │ │ └── codeql-config.yml
│ │ │ └── workflows/
│ │ │ └── main.yml
│ │ ├── .gitignore
│ │ ├── CREDITS.TXT
│ │ ├── LICENSE.md
│ │ ├── Makefile
│ │ ├── README.md
│ │ ├── samples/
│ │ │ ├── Makefile
│ │ │ ├── README.TXT
│ │ │ ├── comeasy/
│ │ │ │ ├── Makefile
│ │ │ │ ├── comeasy.cpp
│ │ │ │ ├── wrotei.cpp
│ │ │ │ └── wrotei.rc
│ │ │ ├── commem/
│ │ │ │ ├── Makefile
│ │ │ │ └── commem.cpp
│ │ │ ├── common.mak
│ │ │ ├── cping/
│ │ │ │ ├── Makefile
│ │ │ │ ├── ReadMe.Txt
│ │ │ │ ├── cping.cpp
│ │ │ │ └── iping.idl
│ │ │ ├── disas/
│ │ │ │ ├── Makefile
│ │ │ │ ├── arm.asm
│ │ │ │ ├── disas.cpp
│ │ │ │ ├── ia64.asm
│ │ │ │ ├── unk.cpp
│ │ │ │ ├── x64.asm
│ │ │ │ └── x86.cpp
│ │ │ ├── dtest/
│ │ │ │ ├── Makefile
│ │ │ │ ├── NORMAL_IA64.TXT
│ │ │ │ ├── NORMAL_X64.TXT
│ │ │ │ ├── NORMAL_X86.TXT
│ │ │ │ ├── dtarge.cpp
│ │ │ │ ├── dtarge.h
│ │ │ │ ├── dtarge.rc
│ │ │ │ └── dtest.cpp
│ │ │ ├── dumpe/
│ │ │ │ ├── Makefile
│ │ │ │ └── dumpe.cpp
│ │ │ ├── dumpi/
│ │ │ │ ├── Makefile
│ │ │ │ └── dumpi.cpp
│ │ │ ├── dynamic_alloc/
│ │ │ │ ├── Makefile
│ │ │ │ ├── main.cpp
│ │ │ │ ├── x64.asm
│ │ │ │ └── x86.asm
│ │ │ ├── echo/
│ │ │ │ ├── Makefile
│ │ │ │ ├── echofx.cpp
│ │ │ │ ├── echofx.rc
│ │ │ │ ├── echonul.cpp
│ │ │ │ └── main.cpp
│ │ │ ├── einst/
│ │ │ │ ├── Makefile
│ │ │ │ ├── edll1x.cpp
│ │ │ │ ├── edll2x.cpp
│ │ │ │ ├── edll3x.cpp
│ │ │ │ └── einst.cpp
│ │ │ ├── excep/
│ │ │ │ ├── Makefile
│ │ │ │ ├── excep.cpp
│ │ │ │ ├── firstexc.cpp
│ │ │ │ └── firstexc.h
│ │ │ ├── findfunc/
│ │ │ │ ├── Makefile
│ │ │ │ ├── extend.cpp
│ │ │ │ ├── extend.rc
│ │ │ │ ├── findfunc.cpp
│ │ │ │ ├── symtest.cpp
│ │ │ │ ├── target.cpp
│ │ │ │ ├── target.h
│ │ │ │ └── target.rc
│ │ │ ├── impmunge/
│ │ │ │ ├── Makefile
│ │ │ │ └── impmunge.cpp
│ │ │ ├── member/
│ │ │ │ ├── Makefile
│ │ │ │ └── member.cpp
│ │ │ ├── opengl/
│ │ │ │ ├── Makefile
│ │ │ │ ├── ogldet.cpp
│ │ │ │ ├── ogldet.rc
│ │ │ │ └── testogl.cpp
│ │ │ ├── region/
│ │ │ │ ├── Makefile
│ │ │ │ └── region.cpp
│ │ │ ├── setdll/
│ │ │ │ ├── Makefile
│ │ │ │ └── setdll.cpp
│ │ │ ├── simple/
│ │ │ │ ├── Makefile
│ │ │ │ ├── simple.cpp
│ │ │ │ ├── simple.rc
│ │ │ │ └── sleep5.cpp
│ │ │ ├── slept/
│ │ │ │ ├── Makefile
│ │ │ │ ├── NORMAL_IA64.TXT
│ │ │ │ ├── NORMAL_X64.TXT
│ │ │ │ ├── NORMAL_X86.TXT
│ │ │ │ ├── dslept.cpp
│ │ │ │ ├── dslept.rc
│ │ │ │ ├── sleepbed.cpp
│ │ │ │ ├── sleepnew.cpp
│ │ │ │ ├── sleepold.cpp
│ │ │ │ ├── slept.cpp
│ │ │ │ ├── slept.h
│ │ │ │ ├── slept.rc
│ │ │ │ └── verify.cpp
│ │ │ ├── syelog/
│ │ │ │ ├── Makefile
│ │ │ │ ├── sltest.cpp
│ │ │ │ ├── sltestp.cpp
│ │ │ │ ├── syelog.cpp
│ │ │ │ ├── syelog.h
│ │ │ │ └── syelogd.cpp
│ │ │ ├── talloc/
│ │ │ │ ├── Makefile
│ │ │ │ ├── NORMAL_IA64.TXT
│ │ │ │ ├── NORMAL_X64.TXT
│ │ │ │ ├── talloc.cpp
│ │ │ │ ├── tdll1x.cpp
│ │ │ │ ├── tdll2x.cpp
│ │ │ │ ├── tdll3x.cpp
│ │ │ │ ├── tdll4x.cpp
│ │ │ │ ├── tdll5x.cpp
│ │ │ │ ├── tdll6x.cpp
│ │ │ │ ├── tdll7x.cpp
│ │ │ │ ├── tdll8x.cpp
│ │ │ │ └── tdll9x.cpp
│ │ │ ├── traceapi/
│ │ │ │ ├── Makefile
│ │ │ │ ├── _win32.cpp
│ │ │ │ ├── testapi.cpp
│ │ │ │ ├── trcapi.cpp
│ │ │ │ └── trcapi.rc
│ │ │ ├── tracebld/
│ │ │ │ ├── Makefile
│ │ │ │ ├── tracebld.cpp
│ │ │ │ ├── tracebld.h
│ │ │ │ ├── trcbld.cpp
│ │ │ │ └── trcbld.rc
│ │ │ ├── tracelnk/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trclnk.cpp
│ │ │ │ └── trclnk.rc
│ │ │ ├── tracemem/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trcmem.cpp
│ │ │ │ └── trcmem.rc
│ │ │ ├── tracereg/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trcreg.cpp
│ │ │ │ └── trcreg.rc
│ │ │ ├── traceser/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trcser.cpp
│ │ │ │ └── trcser.rc
│ │ │ ├── tracessl/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trcssl.cpp
│ │ │ │ └── trcssl.rc
│ │ │ ├── tracetcp/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trctcp.cpp
│ │ │ │ └── trctcp.rc
│ │ │ ├── tryman/
│ │ │ │ ├── Makefile
│ │ │ │ ├── managed.cs
│ │ │ │ ├── size.cpp
│ │ │ │ ├── tryman.cpp
│ │ │ │ ├── tstman.cpp
│ │ │ │ └── tstman.rc
│ │ │ └── withdll/
│ │ │ ├── Makefile
│ │ │ └── withdll.cpp
│ │ ├── src/
│ │ │ ├── Makefile
│ │ │ ├── creatwth.cpp
│ │ │ ├── detours.cpp
│ │ │ ├── detours.h
│ │ │ ├── detver.h
│ │ │ ├── disasm.cpp
│ │ │ ├── disolarm.cpp
│ │ │ ├── disolarm64.cpp
│ │ │ ├── disolia64.cpp
│ │ │ ├── disolx64.cpp
│ │ │ ├── disolx86.cpp
│ │ │ ├── image.cpp
│ │ │ ├── modules.cpp
│ │ │ └── uimports.cpp
│ │ ├── system.mak
│ │ ├── tests/
│ │ │ ├── Makefile
│ │ │ ├── catch.hpp
│ │ │ ├── corruptor.cpp
│ │ │ ├── corruptor.h
│ │ │ ├── main.cpp
│ │ │ ├── test_image_api.cpp
│ │ │ └── test_module_api.cpp
│ │ └── vc/
│ │ ├── Detours.sln
│ │ ├── Detours.vcxproj
│ │ └── Detours.vcxproj.filters
│ ├── highs-sys/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── build.rs
│ │ ├── install-dependencies.sh
│ │ ├── src/
│ │ │ ├── c_bindings.rs
│ │ │ └── lib.rs
│ │ ├── tests/
│ │ │ ├── test_highs_call.rs
│ │ │ └── test_highs_functions.rs
│ │ └── wrapper.h
│ ├── hip_runtime-sys/
│ │ ├── .rustfmt.toml
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ └── src/
│ │ └── lib.rs
│ ├── hipblaslt-sys/
│ │ ├── .rustfmt.toml
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ └── src/
│ │ └── lib.rs
│ ├── miopen-sys/
│ │ ├── .rustfmt.toml
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ └── src/
│ │ └── lib.rs
│ ├── rocblas-sys/
│ │ ├── .rustfmt.toml
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ └── src/
│ │ └── lib.rs
│ ├── rocm_smi-sys/
│ │ ├── .rustfmt.toml
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ └── src/
│ │ └── lib.rs
│ └── rocsparse-sys/
│ ├── .rustfmt.toml
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ └── lib.rs
├── format/
│ ├── .rustfmt.toml
│ ├── Cargo.toml
│ └── src/
│ ├── dark_api.rs
│ ├── dnn8.rs
│ ├── dnn9.rs
│ ├── format_generated.rs
│ ├── format_generated_blas.rs
│ ├── format_generated_blaslt.rs
│ ├── format_generated_blaslt_internal.rs
│ ├── format_generated_dnn8.rs
│ ├── format_generated_dnn9.rs
│ ├── format_generated_fft.rs
│ ├── format_generated_nvml.rs
│ ├── format_generated_sparse.rs
│ └── lib.rs
├── llvm_zluda/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── compile.rs
│ ├── device-libs/
│ │ ├── LICENSE.TXT
│ │ ├── README.md
│ │ ├── ockl.bc
│ │ └── ocml.bc
│ ├── ffi.rs
│ ├── lib.cpp
│ ├── lib.rs
│ └── utils.rs
├── ptx/
│ ├── Cargo.toml
│ ├── lib/
│ │ ├── zluda_ptx_impl.bc
│ │ └── zluda_ptx_impl.cpp
│ └── src/
│ ├── lib.rs
│ ├── pass/
│ │ ├── deparamize_functions.rs
│ │ ├── expand_operands.rs
│ │ ├── fix_special_registers.rs
│ │ ├── hoist_globals.rs
│ │ ├── insert_explicit_load_store.rs
│ │ ├── insert_implicit_conversions.rs
│ │ ├── insert_post_saturation.rs
│ │ ├── instruction_mode_to_global_mode/
│ │ │ ├── call_with_mode.ptx
│ │ │ ├── fold_denormal.ptx
│ │ │ ├── mod.rs
│ │ │ └── test.rs
│ │ ├── llvm/
│ │ │ ├── attributes.rs
│ │ │ ├── emit.rs
│ │ │ └── mod.rs
│ │ ├── mod.rs
│ │ ├── normalize_basic_blocks.rs
│ │ ├── normalize_identifiers.rs
│ │ ├── normalize_predicates.rs
│ │ ├── remove_unreachable_basic_blocks.rs
│ │ ├── replace_instructions_with_functions.rs
│ │ ├── replace_instructions_with_functions_fp_required.rs
│ │ ├── replace_known_functions.rs
│ │ ├── resolve_function_pointers.rs
│ │ └── test/
│ │ ├── expand_operands/
│ │ │ ├── immediate_conversion.ptx
│ │ │ ├── immediates.ptx
│ │ │ ├── mod.rs
│ │ │ ├── vector_extract.ptx
│ │ │ ├── vector_operand.ptx
│ │ │ └── vector_operand_convert.ptx
│ │ ├── insert_implicit_conversions/
│ │ │ ├── default.ptx
│ │ │ ├── default_reg_b32_reg_f16x2.ptx
│ │ │ ├── default_reg_b32_reg_v2_b16.ptx
│ │ │ ├── default_relaxed.ptx
│ │ │ └── mod.rs
│ │ ├── instruction_mode_to_global_mode/
│ │ │ ├── mod.rs
│ │ │ └── mode_conflict.ptx
│ │ ├── mod.rs
│ │ └── normalize_basic_blocks/
│ │ ├── mod.rs
│ │ └── trap.ptx
│ └── test/
│ ├── _Z9vectorAddPKfS0_Pfi.ptx
│ ├── ll/
│ │ ├── _attributes.ll
│ │ ├── abs.ll
│ │ ├── activemask.ll
│ │ ├── add.ll
│ │ ├── add_extended.ll
│ │ ├── add_ftz.ll
│ │ ├── add_non_coherent.ll
│ │ ├── add_s32_sat.ll
│ │ ├── add_tuning.ll
│ │ ├── addc_cc_s32.ll
│ │ ├── and.ll
│ │ ├── assertfail.ll
│ │ ├── atom_add.ll
│ │ ├── atom_add_float.ll
│ │ ├── atom_cas.ll
│ │ ├── atom_inc.ll
│ │ ├── b64tof64.ll
│ │ ├── bar_red_and_pred.ll
│ │ ├── bench.ll
│ │ ├── bfe.ll
│ │ ├── bfi.ll
│ │ ├── block.ll
│ │ ├── bmsk_clamp_b32.ll
│ │ ├── bra.ll
│ │ ├── brev.ll
│ │ ├── call.ll
│ │ ├── call_rnd.ll
│ │ ├── clz.ll
│ │ ├── const.ll
│ │ ├── const_ident.ll
│ │ ├── constant_f32.ll
│ │ ├── constant_negative.ll
│ │ ├── copysign.ll
│ │ ├── cos.ll
│ │ ├── cp_async.ll
│ │ ├── createpolicy.ll
│ │ ├── cvt_f16x2_f32.ll
│ │ ├── cvt_f64_f32.ll
│ │ ├── cvt_pack.ll
│ │ ├── cvt_relu_f16x2_f32.ll
│ │ ├── cvt_rn_bf16x2_f32.ll
│ │ ├── cvt_rn_f16x2_e4m3x2.ll
│ │ ├── cvt_rn_f16x2_e5m2x2.ll
│ │ ├── cvt_rn_satfinite_e4m3x2_f32.ll
│ │ ├── cvt_rn_satfinite_e5m2x2_f32.ll
│ │ ├── cvt_rni.ll
│ │ ├── cvt_rni_u16_f32.ll
│ │ ├── cvt_rzi.ll
│ │ ├── cvt_s16_s8.ll
│ │ ├── cvt_s32_f32.ll
│ │ ├── cvt_s64_s32.ll
│ │ ├── cvt_sat_s_u.ll
│ │ ├── cvta.ll
│ │ ├── div_approx.ll
│ │ ├── div_ftz.ll
│ │ ├── div_noftz.ll
│ │ ├── dp2a.ll
│ │ ├── dp4a.ll
│ │ ├── ex2.ll
│ │ ├── extern_func.ll
│ │ ├── extern_shared.ll
│ │ ├── extern_shared_call.ll
│ │ ├── fma.ll
│ │ ├── fma_bf16x2.ll
│ │ ├── fma_f16x2.ll
│ │ ├── fmax.ll
│ │ ├── global_array.ll
│ │ ├── global_array_f32.ll
│ │ ├── lanemask_le.ll
│ │ ├── lanemask_lt.ll
│ │ ├── ld_st.ll
│ │ ├── ld_st_implicit.ll
│ │ ├── ld_st_offset.ll
│ │ ├── ldmatrix.ll
│ │ ├── ldmatrix_trans.ll
│ │ ├── lg2.ll
│ │ ├── local_align.ll
│ │ ├── mad_extended.ll
│ │ ├── mad_s32.ll
│ │ ├── mad_wide.ll
│ │ ├── malformed_label.ll
│ │ ├── max.ll
│ │ ├── membar.ll
│ │ ├── min.ll
│ │ ├── min_f16.ll
│ │ ├── min_nan_f16.ll
│ │ ├── mma_m16n8k16_f32_bf16_bf16_f32.ll
│ │ ├── mma_m16n8k16_f32_bf16_bf16_f32_2x.ll
│ │ ├── mma_m16n8k16_f32_f16_f16_f32.ll
│ │ ├── mma_m16n8k32_s32_s8_s8_s32.ll
│ │ ├── mma_m16n8k32_s32_s8_s8_s32_interleave.ll
│ │ ├── mov.ll
│ │ ├── mov_address.ll
│ │ ├── mul24_hi_s32.ll
│ │ ├── mul24_hi_u32.ll
│ │ ├── mul24_lo_s32.ll
│ │ ├── mul24_lo_u32.ll
│ │ ├── mul_ftz.ll
│ │ ├── mul_hi.ll
│ │ ├── mul_lo.ll
│ │ ├── mul_non_ftz.ll
│ │ ├── mul_wide.ll
│ │ ├── multiple_return.ll
│ │ ├── nanosleep.ll
│ │ ├── neg.ll
│ │ ├── non_scalar_ptr_offset.ll
│ │ ├── noreturn.ll
│ │ ├── not.ll
│ │ ├── ntid.ll
│ │ ├── or.ll
│ │ ├── param_is_addressable.ll
│ │ ├── popc.ll
│ │ ├── pred_not.ll
│ │ ├── prmt.ll
│ │ ├── prmt_slow.ll
│ │ ├── rcp.ll
│ │ ├── redux_sync_add_u32_partial.ll
│ │ ├── redux_sync_op_s32.ll
│ │ ├── redux_sync_op_u32.ll
│ │ ├── reg_local.ll
│ │ ├── reg_multi.ll
│ │ ├── rem.ll
│ │ ├── rsqrt.ll
│ │ ├── sad_s64.ll
│ │ ├── selp.ll
│ │ ├── selp_true.ll
│ │ ├── set_f16.ll
│ │ ├── setp.ll
│ │ ├── setp_gt.ll
│ │ ├── setp_leu.ll
│ │ ├── setp_nan.ll
│ │ ├── setp_num.ll
│ │ ├── shared_ptr_32.ll
│ │ ├── shared_ptr_take_address.ll
│ │ ├── shared_unify_extern.ll
│ │ ├── shared_unify_local.ll
│ │ ├── shared_variable.ll
│ │ ├── shf_l.ll
│ │ ├── shf_l_clamp.ll
│ │ ├── shf_l_wrap.ll
│ │ ├── shf_r.ll
│ │ ├── shf_r_clamp.ll
│ │ ├── shf_r_wrap.ll
│ │ ├── shfl_sync_bfly_b32_pred.ll
│ │ ├── shfl_sync_down_b32_pred.ll
│ │ ├── shfl_sync_idx_b32_pred.ll
│ │ ├── shfl_sync_mode_b32.ll
│ │ ├── shfl_sync_up_b32_pred.ll
│ │ ├── shl.ll
│ │ ├── shr.ll
│ │ ├── shr_oob.ll
│ │ ├── sign_extend.ll
│ │ ├── sin.ll
│ │ ├── sqrt.ll
│ │ ├── sqrt_rn_ftz.ll
│ │ ├── stateful_ld_st_ntid.ll
│ │ ├── stateful_ld_st_ntid_chain.ll
│ │ ├── stateful_ld_st_ntid_sub.ll
│ │ ├── stateful_ld_st_simple.ll
│ │ ├── stateful_neg_offset.ll
│ │ ├── sub.ll
│ │ ├── sub_extended.ll
│ │ ├── subc_cc_s32.ll
│ │ ├── tanh.ll
│ │ ├── tid.ll
│ │ ├── trap.ll
│ │ ├── uint_to_fp_bf16.ll
│ │ ├── vector.ll
│ │ ├── vector4.ll
│ │ ├── vector8.ll
│ │ ├── vector8_extract.ll
│ │ ├── vector_extract.ll
│ │ ├── vector_operand.ll
│ │ ├── vote_all.ll
│ │ ├── vote_all_sub.ll
│ │ ├── vote_any.ll
│ │ ├── vote_ballot.ll
│ │ ├── warp_sz.ll
│ │ └── xor.ll
│ ├── mod.rs
│ ├── operands.ptx
│ ├── spirv_build/
│ │ ├── bar_sync.ptx
│ │ ├── global_extern_array.ptx
│ │ └── param_func_array_0.ptx
│ ├── spirv_fail/
│ │ ├── const_ptr.ptx
│ │ ├── global_ptr.ptx
│ │ ├── local_ptr.txt
│ │ ├── param_entry_array_0.ptx
│ │ ├── param_vector.ptx
│ │ ├── shared_ptr.ptx
│ │ └── shared_ptr2.ptx
│ ├── spirv_run/
│ │ ├── abs.ptx
│ │ ├── activemask.ptx
│ │ ├── add.ptx
│ │ ├── add_extended.ptx
│ │ ├── add_ftz.ptx
│ │ ├── add_non_coherent.ptx
│ │ ├── add_s32_sat.ptx
│ │ ├── add_tuning.ptx
│ │ ├── addc_cc_s32.ptx
│ │ ├── and.ptx
│ │ ├── assertfail.ptx
│ │ ├── atom_add.ptx
│ │ ├── atom_add_float.ptx
│ │ ├── atom_cas.ptx
│ │ ├── atom_inc.ptx
│ │ ├── atomics_128.ptx
│ │ ├── b64tof64.ptx
│ │ ├── bar_red_and_pred.ptx
│ │ ├── bfe.ptx
│ │ ├── bfi.ptx
│ │ ├── block.ptx
│ │ ├── bmsk_clamp_b32.ptx
│ │ ├── bra.ptx
│ │ ├── brev.ptx
│ │ ├── call.ptx
│ │ ├── call_rnd.ptx
│ │ ├── clz.ptx
│ │ ├── const.ptx
│ │ ├── const_ident.ptx
│ │ ├── constant_f32.ptx
│ │ ├── constant_negative.ptx
│ │ ├── copysign.ptx
│ │ ├── cos.ptx
│ │ ├── cp_async.ptx
│ │ ├── createpolicy.ptx
│ │ ├── cvt_f16x2_f32.ptx
│ │ ├── cvt_f64_f32.ptx
│ │ ├── cvt_pack.ptx
│ │ ├── cvt_relu_f16x2_f32.ptx
│ │ ├── cvt_rn_bf16x2_f32.ptx
│ │ ├── cvt_rn_f16x2_e4m3x2.ptx
│ │ ├── cvt_rn_f16x2_e5m2x2.ptx
│ │ ├── cvt_rn_satfinite_e4m3x2_f32.ptx
│ │ ├── cvt_rn_satfinite_e5m2x2_f32.ptx
│ │ ├── cvt_rni.ptx
│ │ ├── cvt_rni_u16_f32.ptx
│ │ ├── cvt_rzi.ptx
│ │ ├── cvt_s16_s8.ptx
│ │ ├── cvt_s32_f32.ptx
│ │ ├── cvt_s64_s32.ptx
│ │ ├── cvt_sat_s_u.ptx
│ │ ├── cvta.ptx
│ │ ├── div_approx.ptx
│ │ ├── div_ftz.ptx
│ │ ├── div_noftz.ptx
│ │ ├── dp2a.ptx
│ │ ├── dp4a.ptx
│ │ ├── ex2.ptx
│ │ ├── extern_func.ptx
│ │ ├── extern_shared.ptx
│ │ ├── extern_shared_call.ptx
│ │ ├── fma.ptx
│ │ ├── fma_bf16x2.ptx
│ │ ├── fma_f16x2.ptx
│ │ ├── fmax.ptx
│ │ ├── func_ptr.ptx
│ │ ├── global_array.ptx
│ │ ├── global_array_f32.ptx
│ │ ├── implicit_param.ptx
│ │ ├── lanemask_lt.ptx
│ │ ├── ld_st.ptx
│ │ ├── ld_st_implicit.ptx
│ │ ├── ld_st_offset.ptx
│ │ ├── ldmatrix.ptx
│ │ ├── ldmatrix_trans.ptx
│ │ ├── lg2.ptx
│ │ ├── local_align.ptx
│ │ ├── mad_extended.ptx
│ │ ├── mad_s32.ptx
│ │ ├── mad_wide.ptx
│ │ ├── malformed_label.ptx
│ │ ├── max.ptx
│ │ ├── membar.ptx
│ │ ├── min.ptx
│ │ ├── min_f16.ptx
│ │ ├── min_nan_f16.ptx
│ │ ├── mma_m16n8k16_f32_bf16_bf16_f32.ptx
│ │ ├── mma_m16n8k16_f32_bf16_bf16_f32_2x.ptx
│ │ ├── mma_m16n8k16_f32_f16_f16_f32.ptx
│ │ ├── mma_m16n8k32_s32_s8_s8_s32.ptx
│ │ ├── mma_m16n8k32_s32_s8_s8_s32_interleave.ptx
│ │ ├── mod.rs
│ │ ├── mov.ptx
│ │ ├── mov_address.ptx
│ │ ├── mul24_hi_s32.ptx
│ │ ├── mul24_hi_u32.ptx
│ │ ├── mul24_lo_s32.ptx
│ │ ├── mul24_lo_u32.ptx
│ │ ├── mul_ftz.ptx
│ │ ├── mul_hi.ptx
│ │ ├── mul_lo.ptx
│ │ ├── mul_non_ftz.ptx
│ │ ├── mul_wide.ptx
│ │ ├── multiple_return.ptx
│ │ ├── nanosleep.ptx
│ │ ├── neg.ptx
│ │ ├── non_scalar_ptr_offset.ptx
│ │ ├── noreturn.ptx
│ │ ├── not.ptx
│ │ ├── ntid.ptx
│ │ ├── or.ptx
│ │ ├── param_is_addressable.ptx
│ │ ├── popc.ptx
│ │ ├── pred_not.ptx
│ │ ├── prmt.ptx
│ │ ├── prmt_slow.ptx
│ │ ├── rcp.ptx
│ │ ├── redux_sync_add_u32_partial.ptx
│ │ ├── redux_sync_op_s32.ptx
│ │ ├── redux_sync_op_u32.ptx
│ │ ├── reg_local.ptx
│ │ ├── reg_multi.ptx
│ │ ├── rem.ptx
│ │ ├── rsqrt.ptx
│ │ ├── sad_s64.ptx
│ │ ├── selp.ptx
│ │ ├── selp_true.ptx
│ │ ├── set_f16.ptx
│ │ ├── setp.ptx
│ │ ├── setp_gt.ptx
│ │ ├── setp_leu.ptx
│ │ ├── setp_nan.ptx
│ │ ├── setp_num.ptx
│ │ ├── shared_ptr_32.ptx
│ │ ├── shared_ptr_take_address.ptx
│ │ ├── shared_unify_extern.ptx
│ │ ├── shared_unify_local.ptx
│ │ ├── shared_variable.ptx
│ │ ├── shf_l.ptx
│ │ ├── shf_l_clamp.ptx
│ │ ├── shf_l_wrap.ptx
│ │ ├── shf_r.ptx
│ │ ├── shf_r_clamp.ptx
│ │ ├── shf_r_wrap.ptx
│ │ ├── shfl_sync_bfly_b32_pred.ptx
│ │ ├── shfl_sync_down_b32_pred.ptx
│ │ ├── shfl_sync_idx_b32_pred.ptx
│ │ ├── shfl_sync_mode_b32.ptx
│ │ ├── shfl_sync_up_b32_pred.ptx
│ │ ├── shl.ptx
│ │ ├── shr.ptx
│ │ ├── shr_oob.ptx
│ │ ├── sign_extend.ptx
│ │ ├── sin.ptx
│ │ ├── sqrt.ptx
│ │ ├── sqrt_rn_ftz.ptx
│ │ ├── stateful_ld_st_ntid.ptx
│ │ ├── stateful_ld_st_ntid_chain.ptx
│ │ ├── stateful_ld_st_ntid_sub.ptx
│ │ ├── stateful_ld_st_simple.ptx
│ │ ├── stateful_neg_offset.ptx
│ │ ├── sub.ptx
│ │ ├── sub_extended.ptx
│ │ ├── subc_cc_s32.ptx
│ │ ├── tanh.ptx
│ │ ├── tid.ptx
│ │ ├── trap.ptx
│ │ ├── uint_to_fp_bf16.ptx
│ │ ├── vector.ptx
│ │ ├── vector4.ptx
│ │ ├── vector8.ptx
│ │ ├── vector8_extract.ptx
│ │ ├── vector_extract.ptx
│ │ ├── vector_operand.ptx
│ │ ├── verify.py
│ │ ├── vote_all.ptx
│ │ ├── vote_all_sub.ptx
│ │ ├── vote_any.ptx
│ │ ├── vote_ballot.ptx
│ │ ├── warp_sz.ptx
│ │ └── xor.ptx
│ ├── vectorAdd_11.ptx
│ └── vectorAdd_kernel64.ptx
├── ptx_parser/
│ ├── Cargo.toml
│ └── src/
│ ├── ast.rs
│ ├── check_args.py
│ └── lib.rs
├── ptx_parser_macros/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── ptx_parser_macros_impl/
│ ├── Cargo.toml
│ └── src/
│ ├── lib.rs
│ └── parser.rs
├── ptxas/
│ ├── Cargo.toml
│ └── src/
│ └── main.rs
├── xtask/
│ ├── Cargo.toml
│ └── src/
│ └── main.rs
├── zluda/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── impl/
│ │ ├── context.rs
│ │ ├── device.rs
│ │ ├── driver.rs
│ │ ├── event.rs
│ │ ├── function.rs
│ │ ├── graph.rs
│ │ ├── hipfix.rs
│ │ ├── kernel.rs
│ │ ├── library.rs
│ │ ├── memory.rs
│ │ ├── mod.rs
│ │ ├── module.rs
│ │ ├── os_unix.rs
│ │ ├── os_win.rs
│ │ ├── pointer.rs
│ │ └── stream.rs
│ ├── lib.rs
│ ├── os_unix.rs
│ ├── os_win.rs
│ └── tests.rs
├── zluda_bindgen/
│ ├── Cargo.toml
│ ├── build/
│ │ ├── cublasLt_internal.h
│ │ ├── cublas_wrapper.h
│ │ ├── cuda_wrapper.h
│ │ ├── cudnn_v8/
│ │ │ ├── cudnn_adv_infer.h
│ │ │ ├── cudnn_adv_train.h
│ │ │ ├── cudnn_backend.h
│ │ │ ├── cudnn_cnn_infer.h
│ │ │ ├── cudnn_cnn_train.h
│ │ │ ├── cudnn_ops_infer.h
│ │ │ ├── cudnn_ops_train.h
│ │ │ └── cudnn_version.h
│ │ ├── cufft_wraper.h
│ │ └── decompile_cublaslt_internal.py
│ └── src/
│ ├── main.rs
│ └── process_table.rs
├── zluda_blas/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── impl.rs
│ ├── lib.rs
│ └── tests.rs
├── zluda_blaslt/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── impl.rs
│ └── lib.rs
├── zluda_cache/
│ ├── Cargo.toml
│ ├── diesel.toml
│ ├── migrations/
│ │ ├── .keep
│ │ └── 2025-08-04-203347_create_initial/
│ │ ├── down.sql
│ │ └── up.sql
│ └── src/
│ ├── lib.rs
│ ├── models.rs
│ └── schema.rs
├── zluda_common/
│ ├── Cargo.toml
│ └── src/
│ ├── constants.rs
│ ├── lib.rs
│ ├── os_unix.rs
│ └── os_win.rs
├── zluda_dnn/
│ ├── Cargo.toml
│ └── src/
│ ├── impl.rs
│ └── lib.rs
├── zluda_dnn8/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ └── lib.rs
├── zluda_dnn9/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── lib.rs
│ └── tests.rs
├── zluda_fft/
│ ├── Cargo.toml
│ └── src/
│ ├── impl.rs
│ └── lib.rs
├── zluda_inject/
│ ├── Cargo.toml
│ ├── build.rs
│ ├── src/
│ │ ├── args.rs
│ │ ├── bin.rs
│ │ ├── main.rs
│ │ └── win.rs
│ └── tests/
│ ├── helpers/
│ │ ├── direct_cuinit.rs
│ │ ├── do_cuinit.rs
│ │ ├── do_cuinit_early.rs
│ │ ├── do_cuinit_late.rs
│ │ ├── do_cuinit_late_clr.cs
│ │ ├── indirect_cuinit.rs
│ │ └── subprocess.rs
│ └── inject.rs
├── zluda_ld/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_ml/
│ ├── Cargo.toml
│ └── src/
│ ├── impl_common.rs
│ ├── impl_unix.rs
│ ├── impl_win.rs
│ └── lib.rs
├── zluda_precompile/
│ ├── Cargo.toml
│ └── src/
│ └── main.rs
├── zluda_redirect/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_sparse/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── impl.rs
│ └── lib.rs
├── zluda_trace/
│ ├── Cargo.toml
│ └── src/
│ ├── dark_api.rs
│ ├── lib.rs
│ ├── log.rs
│ ├── os_unix.rs
│ ├── os_win.rs
│ └── trace.rs
├── zluda_trace_blas/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_blaslt/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_common/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_dnn8/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_dnn9/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_fft/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_nvml/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_sparse/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
└── zluda_windows/
├── Cargo.toml
├── library.manifest
├── manifest.rc
└── src/
└── lib.rs
================================================
FILE CONTENTS
================================================
================================================
FILE: .cargo/config.toml
================================================
[alias]
xtask = "run --package xtask --"
[target.x86_64-pc-windows-msvc]
rustflags = ["-Ctarget-feature=+crt-static"]
================================================
FILE: .devcontainer/Dockerfile
================================================
FROM nvidia/cuda:13.0.1-base-ubuntu24.04
RUN DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
wget \
build-essential \
cmake \
ninja-build \
python3 \
ripgrep \
git \
ltrace \
# required by llvm 17
lsb-release software-properties-common gnupg
ARG LLVM_VERSION=17
RUN wget https://apt.llvm.org/llvm.sh && \
chmod +x llvm.sh && \
./llvm.sh ${LLVM_VERSION}
# Feel free to change to a newer version if you have a newer verison on your host
ARG CUDA_PKG_VERSION=13-0
# Docker <-> host driver version compatiblity is newer host <-> older docker
# Driver 580+ is required for CUDA 13
ARG CUDA_DRIVER=580
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb && \
dpkg-deb -R libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb /opt && \
rm libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb
RUN DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
# CUDA headers need it for interop
libgl-dev libegl-dev libvdpau-dev \
nvidia-headless-no-dkms-${CUDA_DRIVER}-open \
cuda-cudart-dev-${CUDA_PKG_VERSION} \
cuda-nvml-dev-${CUDA_PKG_VERSION} \
cuda-cudart-${CUDA_PKG_VERSION} \
cuda-profiler-api-${CUDA_PKG_VERSION} \
cuda-nvcc-${CUDA_PKG_VERSION} \
cudnn9-cuda-${CUDA_PKG_VERSION} \
libcufft-dev-${CUDA_PKG_VERSION} \
libcublas-dev-${CUDA_PKG_VERSION} \
libcusparse-dev-${CUDA_PKG_VERSION}
ARG ROCM_VERSION=6.4.4
RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} noble main" > /etc/apt/sources.list.d/rocm.list && \
echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600 && \
DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
rocminfo \
rocm-gdb \
rocm-smi-lib amd-smi-lib \
rocm-llvm-dev \
hip-runtime-amd \
miopen-hip-dev \
rocfft-dev \
rocblas-dev \
hipblaslt-dev \
rocsolver-dev \
rocsparse-dev \
hip-dev && \
echo '/opt/rocm/lib' > /etc/ld.so.conf.d/rocm.conf && \
ldconfig
ENV PATH=$PATH:/opt/rocm-${ROCM_VERSION}/bin
================================================
FILE: .devcontainer/devcontainer.json
================================================
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/rust
{
"name": "zluda",
"build": {
"dockerfile": "Dockerfile"
},
"securityOpt": [ "seccomp=unconfined" ],
"runArgs": [
//"--runtime=nvidia",
"--device=/dev/kfd",
"--device=/dev/dri",
"--group-add=video"
],
"mounts": [
{
"source": "${localEnv:HOME}/.cargo/",
"target": "/root/.cargo",
"type": "bind"
}
],
// https://containers.dev/features.
"features": {
"ghcr.io/devcontainers/features/rust:1": {}
},
// https://aka.ms/dev-containers-non-root.
"remoteUser": "root",
"hostRequirements": { "gpu": true },
"customizations": {
"vscode": {
"extensions": [ "mhutchie.git-graph" ]
}
},
"containerEnv": {
"NVIDIA_DISABLE_REQUIRE": "1"
}
}
================================================
FILE: .git-blame-ignore-revs
================================================
21ef5f60a3a5efa17855a30f6b5c7d1968cd46ba
================================================
FILE: .gitattributes
================================================
ext/** linguist-vendored
*.dll filter=lfs diff=lfs merge=lfs -text
*.bc filter=lfs diff=lfs merge=lfs -text
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true
================================================
FILE: .github/ISSUE_TEMPLATE/zluda_dump.yml
================================================
name: Bug Report
description: Report an issue with ZLUDA
body:
- type: markdown
attributes:
value: |
If you are reporting an application that is not supported by ZLUDA, please use zluda_trace to
create logs. See instructions here: https://zluda.readthedocs.io/latest/troubleshooting.html
- type: textarea
id: logs
attributes:
label: zluda_trace logs (tarball/zip file)
description: |
Please create a tarball (`.tar.gz`) or zip file (`.zip`) of your log directory and attach
it here. You can drag and drop files directly into the comment box. Please also include
zluda_trace logs using CUDA if you have NVIDIA hardware to test on.
placeholder: Attach file (e.g., drag and drop)
validations:
required: false
- type: textarea
id: description
attributes:
label: Description
description: |
Describe the issue you've encountered. What is the expected behavior? What is the actual
behavior?
placeholder: Description
validations:
required: false
- type: textarea
id: reproduce
attributes:
label: Steps to reproduce
description: |
Please describe the application you were running and provide clear, step-by-step
instructions to run it.
placeholder: |
example:
1. Download llm.c: git clone https://github.com/karpathy/llm.c.git
2. Navigate to the directory: cd llm.c
3. Download the model and train it:
chmod u+x ./dev/download_starter_pack.sh
./dev/download_starter_pack.sh
make train_gpt2fp32cu
./train_gpt2fp32cu
4. Build and run the tests:
make test_gpt2fp32cu
LD_LIBRARY_PATH=<ZLUDA_LOG_DIR> ./test_gpt2fp32cu
validations:
required: true
- type: input
id: version
attributes:
label: ZLUDA version
description: What version of ZLUDA are you using? Due to legal issues **versions older than 4 are not supported**
placeholder: "example: 5-preview.113"
validations:
required: true
- type: input
id: os
attributes:
label: Operating System
description: What operating system are you using? (e.g., distribution and version)
placeholder: "example: Ubuntu 22.04.5 LTS"
validations:
required: true
- type: input
id: gpu
attributes:
label: GPU
description: What GPU are you using?
placeholder: "example: AMD Radeon RX 6600"
validations:
required: true
================================================
FILE: .github/workflows/move_tests.sh
================================================
#!/bin/bash
set -ex
TEST_EXECUTABLES_DIR=$1
SUFFIX=$2
ls ${TEST_EXECUTABLES_DIR}/* | sort -u | while read -r executable; do
output=$("$executable" --list 2>/dev/null)
exit_code=$?
if [ $exit_code -eq 0 ] && echo "$output" | grep -q "_${SUFFIX}: test$"; then
mv "$executable" "${TEST_EXECUTABLES_DIR}/../${SUFFIX}/"
fi
done
================================================
FILE: .github/workflows/nightly_tests.yml
================================================
name: Nightly tests
on:
workflow_call:
workflow_dispatch:
env:
ROCM_VERSION: "6.3.4"
AMDGPU_VERSION: "6.4.4"
TEST_THREADS: 24
jobs:
run_tests:
runs-on: gpu_large
steps:
- uses: actions/checkout@v4
with:
repository: 'vosen/ZLUDA'
path: zluda-src
sparse-checkout: |
.github/workflows/rocm_setup_run.sh
- name: Install ROCm
run: sudo bash zluda-src/.github/workflows/rocm_setup_run.sh ${{ env.ROCM_VERSION }} ${{ env.AMDGPU_VERSION }}
- uses: actions/checkout@v4
with:
repository: 'vosen/ptx_tests'
- uses: robinraju/release-downloader@v1
with:
repository: 'vosen/ZLUDA'
latest: true
preRelease: true
extract: true
fileName: 'zluda-linux-*.tar.gz'
- name: Build and run
run: |
DEBIAN_FRONTEND=noninteractive sudo apt install -y --no-install-recommends curl
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain nightly --profile minimal -y
source ~/.cargo/env
pids=()
exit_codes=()
for i in $(seq 0 $((${{ env.TEST_THREADS }} - 1))); do
cargo run -r -- zluda/libcuda.so.1 --shard-index $i --shard-count ${{ env.TEST_THREADS }} > output_$i.log 2>&1 &
pids+=($!)
done
for pid in "${pids[@]}"; do
wait $pid
exit_codes+=($?)
done
error_occurred=0
for i in "${!exit_codes[@]}"; do
if [ ${exit_codes[$i]} -ne 0 ]; then
error_occurred=1
fi
done
exit $error_occurred
- name: Upload logs
if: always()
uses: actions/upload-artifact@v4
with:
name: output_logs
path: output_*.log
================================================
FILE: .github/workflows/pr_master.yml
================================================
name: ZLUDA
on:
pull_request:
branches: [ master ]
env:
CARGO_TERM_COLOR: always
CARGO_PROFILE: release
SCCACHE_GHA_ENABLED: "true"
RUSTC_WRAPPER: "sccache"
SCCACHE_MAX_FRAME_LENGTH: "104857600" # 100 MB
ROCM_VERSION: "6.3.4"
AMDGPU_VERSION: "6.4.4"
jobs:
check_whitespace:
name: Check Whitespace
runs-on: ubuntu-22.04
steps:
- run: |
sudo apt install fd-find
fdfind \
--exclude '*.bc' \
--exclude '*.exe' \
--exclude '*.lib' \
--exclude ext/detours \
--strip-cwd-prefix \
--type file \
--exec bash -c '
diff \
--unified \
--label "a/$0" \
--label "b/$0" \
<(cat "$0") \
<(sed --regexp-extended "s/\s+$//; \$a\\" "$0")
'
formatting:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt
- name: Check Rust formatting
uses: actions-rust-lang/rustfmt@v1
build_linux:
name: Build (Linux)
runs-on: ubuntu-22.04
steps:
- uses: jlumbroso/free-disk-space@v1.3.1
- uses: actions/checkout@v4
with:
lfs: true
submodules: true
- name: Install ROCm
run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }}
- name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.9
- name: Build
# https://github.com/actions/upload-artifact/issues/39
run: |
cargo xtask zip --profile ${{ env.CARGO_PROFILE }}
mkdir target/${{ env.CARGO_PROFILE }}/zluda
tar -xzf target/${{ env.CARGO_PROFILE }}/zluda.tar.gz -C target/${{ env.CARGO_PROFILE }}/zluda
- name: Set revision hash
run: echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
- name: Upload
uses: actions/upload-artifact@v4
with:
name: zluda-linux-${{ env.SHORT_SHA }}
path: target/${{ env.CARGO_PROFILE }}/zluda
build_windows:
name: Build (Windows)
runs-on: windows-2022
steps:
- uses: actions/checkout@v4
with:
lfs: true
submodules: true
- name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.9
- name: Build
run: |
cargo xtask zip --profile ${{ env.CARGO_PROFILE }}
Expand-Archive -Path target/${{ env.CARGO_PROFILE }}/zluda.zip -DestinationPath target/${{ env.CARGO_PROFILE }}/zluda
- name: Set revision hash
run: echo "SHORT_SHA=$("${{ github.sha }}".SubString(0, 7))" >> $env:GITHUB_ENV
- name: Upload
uses: actions/upload-artifact@v4
with:
name: zluda-windows-${{ env.SHORT_SHA }}
path: target/${{ env.CARGO_PROFILE }}/zluda
build_tests:
name: Build AMD GPU unit tests
runs-on: gpu_small
outputs:
test_package: ${{ steps.upload_artifacts.outputs.artifact-id }}
steps:
- uses: jlumbroso/free-disk-space@v1.3.1
- name: Install build tools
run: |
sudo apt update
sudo apt install -y git git-lfs build-essential cmake
- uses: actions/checkout@v4
with:
lfs: true
submodules: true
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
rustflags: ""
- name: Install ROCm
run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }}
- name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.9
- uses: taiki-e/install-action@v2
with:
tool: cargo-export
- name: Build
run: |
cargo export target/tests -- test --features ci_build --workspace \
--exclude cuda_macros \
--exclude ptx_parser_macros \
--exclude zluda_inject \
--exclude zluda_redirect
mkdir -p target/amdgpu
bash .github/workflows/move_tests.sh target/tests amdgpu
strip target/amdgpu/*
- name: Upload
id: upload_artifacts
uses: actions/upload-artifact@v4
with:
name: tests
path: target/amdgpu
retention-days: 7
run_tests:
name: Run AMD GPU unit tests
runs-on: gpu_small
needs: [build_tests]
steps:
- uses: actions/checkout@v4
with:
submodules: false
sparse-checkout: .github
- name: Install ROCm
run: sudo bash .github/workflows/rocm_setup_run.sh ${{ env.ROCM_VERSION }} ${{ env.AMDGPU_VERSION }}
- uses: actions/download-artifact@v4
with:
artifact-ids: ${{ needs.build_tests.outputs.test_package }}
path: target
- name: Run tests
run: |
chmod +x target/tests/*
error_occurred=0
for exe in target/tests/*; do
./"$exe" _amdgpu || { error_occurred=1; true; }
done
exit $error_occurred
================================================
FILE: .github/workflows/push_master.yml
================================================
name: ZLUDA
on:
workflow_dispatch:
push:
branches: [ master ]
env:
CARGO_TERM_COLOR: always
CARGO_PROFILE: release-lto
SCCACHE_GHA_ENABLED: "true"
RUSTC_WRAPPER: "sccache"
SCCACHE_MAX_FRAME_LENGTH: "104857600" # 100 MB
ROCM_VERSION: "6.3.4"
AMDGPU_VERSION: "6.4.4"
jobs:
build_linux:
name: Build (Linux)
runs-on: ubuntu-22.04
permissions:
contents: write
steps:
- uses: jlumbroso/free-disk-space@v1.3.1
- uses: actions/checkout@v4
# fetch-depth and fetch-tags are required to properly tag pre-release builds
with:
fetch-depth: 0
fetch-tags: true
lfs: true
submodules: true
- name: Install ROCm
run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }}
- name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.9
- name: Build
# https://github.com/actions/upload-artifact/issues/39
run: |
cargo xtask zip --profile ${{ env.CARGO_PROFILE }}
mkdir target/${{ env.CARGO_PROFILE }}/zluda
tar -xzf target/${{ env.CARGO_PROFILE }}/zluda.tar.gz -C target/${{ env.CARGO_PROFILE }}/zluda
- name: Set revision hash
run: echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
- name: Upload
uses: actions/upload-artifact@v4
with:
name: zluda-linux-${{ env.SHORT_SHA }}
path: target/${{ env.CARGO_PROFILE }}/zluda
- name: Prepare artifact for release
run: |
mv target/${{ env.CARGO_PROFILE }}/zluda.tar.gz target/${{ env.CARGO_PROFILE }}/zluda-linux-${{ env.SHORT_SHA }}.tar.gz
latest_tag=$(git tag -l "v*" | grep -E "^v[0-9]+$" | sort -V | tail -n 1)
next_version="$((${latest_tag:1} + 1))"
offset=$(git rev-list $latest_tag..HEAD --count)
echo "VERSION=$next_version-preview.$offset" >> $GITHUB_OUTPUT
id: prepare_artifacts
- uses: ncipollo/release-action@v1
with:
prerelease: true
generateReleaseNotes: true
allowUpdates: true
omitNameDuringUpdate: true
artifacts: "target/${{ env.CARGO_PROFILE }}/zluda-linux-${{ env.SHORT_SHA }}.tar.gz"
name: "Version ${{ steps.prepare_artifacts.outputs.VERSION }}"
tag: "v${{ steps.prepare_artifacts.outputs.VERSION }}"
build_windows:
name: Build (Windows)
runs-on: windows-2022
permissions:
contents: write
steps:
- uses: actions/checkout@v4
# fetch-depth and fetch-tags are required to properly tag pre-release builds
with:
fetch-depth: 0
fetch-tags: true
lfs: true
submodules: true
- name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.9
- name: Build
run: |
cargo xtask zip --profile ${{ env.CARGO_PROFILE }}
Expand-Archive -Path target/${{ env.CARGO_PROFILE }}/zluda.zip -DestinationPath target/${{ env.CARGO_PROFILE }}/zluda
- name: Set revision hash
run: echo "SHORT_SHA=$("${{ github.sha }}".SubString(0, 7))" >> $env:GITHUB_ENV
- name: Upload
uses: actions/upload-artifact@v4
with:
name: zluda-windows-${{ env.SHORT_SHA }}
path: target/${{ env.CARGO_PROFILE }}/zluda
- name: Prepare artifact for release
shell: bash
working-directory: ${{ github.workspace }}
run: |
mv target/${{ env.CARGO_PROFILE }}/zluda.zip target/${{ env.CARGO_PROFILE }}/zluda-windows-${{ env.SHORT_SHA }}.zip
latest_tag=$(git tag -l "v*" | grep -E "^v[0-9]+$" | sort -V | tail -n 1)
next_version="$((${latest_tag:1} + 1))"
offset=$(git rev-list $latest_tag..HEAD --count)
echo "VERSION=$next_version-preview.$offset" >> $GITHUB_OUTPUT
id: prepare_artifacts
- uses: ncipollo/release-action@v1
with:
prerelease: true
generateReleaseNotes: true
allowUpdates: true
omitNameDuringUpdate: true
artifacts: "target/${{ env.CARGO_PROFILE }}/zluda-windows-${{ env.SHORT_SHA }}.zip"
name: "Version ${{ steps.prepare_artifacts.outputs.VERSION }}"
tag: "v${{ steps.prepare_artifacts.outputs.VERSION }}"
build_tests:
name: Build AMD GPU unit tests
runs-on: gpu_small
outputs:
test_package: ${{ steps.upload_artifacts.outputs.artifact-id }}
steps:
- uses: jlumbroso/free-disk-space@v1.3.1
- name: Install build tools
run: |
sudo apt update
sudo apt install -y git git-lfs build-essential cmake
- uses: actions/checkout@v4
with:
lfs: true
submodules: true
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
rustflags: ""
- name: Install ROCm
run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }} ${{ env.AMDGPU_VERSION }}
- name: Run sccache-cache
uses: mozilla-actions/sccache-action@v0.0.9
- uses: taiki-e/install-action@v2
with:
tool: cargo-export
- name: Build
run: |
cargo export target/tests -- test --features ci_build --workspace \
--exclude cuda_macros \
--exclude ptx_parser_macros \
--exclude zluda_inject \
--exclude zluda_redirect
mkdir -p target/amdgpu
bash .github/workflows/move_tests.sh target/tests amdgpu
strip target/amdgpu/*
- name: Upload
id: upload_artifacts
uses: actions/upload-artifact@v4
with:
name: tests
path: target/amdgpu
retention-days: 7
run_tests:
name: Run AMD GPU unit tests
runs-on: gpu_small
needs: [build_tests]
steps:
- uses: actions/checkout@v4
with:
submodules: false
sparse-checkout: .github
- name: Install ROCm
run: sudo bash .github/workflows/rocm_setup_run.sh ${{ env.ROCM_VERSION }} ${{ env.AMDGPU_VERSION }}
- uses: actions/download-artifact@v4
with:
artifact-ids: ${{ needs.build_tests.outputs.test_package }}
path: target
- name: Run tests
run: |
chmod +x target/tests/*
error_occurred=0
for exe in target/tests/*; do
./"$exe" _amdgpu || { error_occurred=1; true; }
done
exit $error_occurred
================================================
FILE: .github/workflows/rocm_setup_build.sh
================================================
#!/bin/bash
set -ex
ROCM_VERSION=$1
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends gpg patchelf
# Source: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/install-methods/package-manager/package-manager-ubuntu.html
mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION jammy main | tee /etc/apt/sources.list.d/rocm.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \
| tee /etc/apt/preferences.d/rocm-pin-600
DEBIAN_FRONTEND=noninteractive apt update -y
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends rocm-smi-lib rocm-llvm-dev hip-runtime-amd hip-dev rocblas-dev hipblaslt-dev miopen-hip-dev rocsparse-dev
echo 'export PATH="$PATH:/opt/rocm/bin"' | tee /etc/profile.d/rocm.sh
echo "/opt/rocm/lib" | tee /etc/ld.so.conf.d/rocm.conf
ldconfig
================================================
FILE: .github/workflows/rocm_setup_run.sh
================================================
#!/bin/bash
set -ex
ROCM_VERSION=$1
AMDGPU_VERSION=$2
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends gpg zstd unzip "linux-headers-$(uname -r)" "linux-modules-extra-$(uname -r)"
# Source: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/install-methods/package-manager/package-manager-ubuntu.html
mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION noble main | tee /etc/apt/sources.list.d/rocm.list
echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu noble main | tee /etc/apt/sources.list.d/amdgpu.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \
| tee /etc/apt/preferences.d/rocm-pin-600
DEBIAN_FRONTEND=noninteractive apt update -y
# rocm-smi-lib shouldn't be necessary, but somehow ptx tests started linking to it.
# Result of Rust 1.90 linker change?
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends amdgpu-dkms hip-runtime-amd rocm-smi-lib
echo 'export PATH="$PATH:/opt/rocm/bin"' | tee /etc/profile.d/rocm.sh
echo "/opt/rocm/lib" | tee /etc/ld.so.conf.d/rocm.conf
ldconfig
#Grant access to GPUs to all users via udev rules
cat <<'EOF' > /etc/udev/rules.d/70-amdgpu.rules
KERNEL=="kfd", MODE="0666"
SUBSYSTEM=="drm", KERNEL=="renderD*", MODE="0666"
EOF
udevadm control --reload-rules && udevadm trigger
modprobe amdgpu
================================================
FILE: .github/workflows/trigger_nightly_tests.yml
================================================
name: Trigger nightly tests
on:
schedule:
- cron: "0 8 * * *"
jobs:
check_last_nightly_run:
runs-on: 'ubuntu-latest'
outputs:
last_sha: ${{ fromJson(steps.check_last_run.outputs.data).workflow_runs[0].head_sha }}
steps:
- uses: octokit/request-action@v2.4.0
id: check_last_run
with:
route: GET /repos/${{github.repository}}/actions/workflows/nightly_tests.yml/runs?per_page=1&status=completed
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: "echo Last nightly build: ${{ fromJson(steps.check_last_run.outputs.data).workflow_runs[0].head_sha }}"
build:
needs: [check_last_nightly_run]
if: needs.check_last_nightly_run.outputs.last_sha != github.sha
uses: ./.github/workflows/nightly_tests.yml
secrets: inherit
================================================
FILE: .gitignore
================================================
target/
Cargo.lock
.vscode/
.idea/
ptx/lib/zluda_ptx_impl.ll
================================================
FILE: .gitmodules
================================================
[submodule "ext/llvm-project"]
path = ext/llvm-project
url = https://github.com/vosen/llvm-project.git
branch = main
shallow = true
[submodule "ext/HiGHS"]
path = ext/HiGHS
url = https://github.com/ERGO-Code/HiGHS.git
shallow = true
================================================
FILE: .rustfmt.toml
================================================
newline_style = "Unix"
================================================
FILE: Cargo.toml
================================================
[workspace]
resolver = "2"
members = [
"cuda_check",
"cuda_macros",
"cuda_types",
"dark_api",
"detours-sys",
"ext/highs-sys",
"ext/hip_runtime-sys",
"ext/hipblaslt-sys",
"ext/miopen-sys",
"ext/rocblas-sys",
"format",
"ptx",
"ptx_parser",
"ptx_parser_macros",
"ptx_parser_macros_impl",
"ptxas",
"xtask",
"zluda",
"zluda_bindgen",
"zluda_blas",
"zluda_blaslt",
"zluda_cache",
"zluda_common",
"zluda_dnn",
"zluda_dnn8",
"zluda_dnn9",
"zluda_trace",
"zluda_trace_blas",
"zluda_trace_blaslt",
"zluda_trace_common",
"zluda_trace_dnn8",
"zluda_trace_dnn9",
"zluda_trace_fft",
"zluda_trace_nvml",
"zluda_trace_sparse",
"zluda_fft",
"zluda_inject",
"zluda_ld",
"zluda_ml",
"zluda_precompile",
"zluda_redirect",
"zluda_sparse",
"compiler",
]
default-members = ["zluda", "zluda_ml", "zluda_inject", "zluda_redirect", "compiler"]
[profile.release-lto]
inherits = "release"
codegen-units = 1
lto = true
# By default (even in dev) we build LLVM in Release (opt-level is controlled
# by cmake). That's because LLVM in Debug is excruciatingly slow and makes any
# kind of debugging impossible. This profile is a special configuration for when
# you want to build LLVM in Debug
[profile.dev-llvm]
inherits = "dev"
[profile.dev-llvm.package.xtask]
opt-level = 2
[profile.dev.package.xtask]
opt-level = 2
[patch.crates-io]
highs-sys = { path = "ext/highs-sys" }
================================================
FILE: LICENSE-APACHE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
================================================
FILE: LICENSE-MIT
================================================
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
================================================
FILE: README.md
================================================
ZLUDA is a drop-in replacement for CUDA on non-NVIDIA GPUs. ZLUDA allows running unmodified CUDA applications using non-NVIDIA GPUs with near-native performance
<div align="center">
<!-- 80x28 104.75x28 62x28-->
[<img src="https://img.shields.io/badge/quick start-green?style=for-the-badge&logo=readthedocs&logoColor=white" width="267.5" height="56">](https://zluda.readthedocs.io) [<img src="https://img.shields.io/badge/Discord-%235865F2.svg?style=for-the-badge&logo=discord&logoColor=white" width="209.5" height="56">](https://discord.gg/sg6BNzXuc7) [<img src="https://img.shields.io/badge/news-red?style=for-the-badge&logo=book&logoColor=white" width="124" height="56">](https://vosen.github.io/ZLUDA/)
<div/>
================================================
FILE: compiler/Cargo.toml
================================================
[package]
name = "compiler"
description = "ZLUDA offline compiler"
version = "0.0.0"
authors = ["Joëlle van Essen <joelle@v-essen.nl>"]
edition = "2021"
[[bin]]
name = "zoc"
path = "src/main.rs"
[dependencies]
bpaf = { version = "0.9.19", features = ["derive", "bright-color"] }
llvm_zluda = { path = "../llvm_zluda" }
ptx = { path = "../ptx" }
ptx_parser = { path = "../ptx_parser" }
libloading = "0.8"
thiserror = "2.0.12"
[package.metadata.zluda]
debug_only = true
================================================
FILE: compiler/src/error.rs
================================================
use ptx::TranslateError;
use ptx_parser::PtxError;
use std::ffi::FromBytesUntilNulError;
use std::io;
use std::str::Utf8Error;
#[derive(Debug, thiserror::Error)]
pub enum CompilerError {
#[error("HIP error code: {0:?}")]
HipError(u32),
#[error(transparent)]
Libloading(#[from] libloading::Error),
#[error(transparent)]
IoError(#[from] io::Error),
#[error(transparent)]
Utf8Error(#[from] Utf8Error),
#[error(transparent)]
FromBytesUntilNulError(#[from] FromBytesUntilNulError),
#[error("{message}")]
GenericError {
#[source]
cause: Option<Box<dyn std::error::Error>>,
message: String,
},
}
impl From<Vec<PtxError<'_>>> for CompilerError {
fn from(causes: Vec<PtxError>) -> Self {
let errors: Vec<String> = causes
.iter()
.map(|e| {
let msg = match e {
PtxError::UnrecognizedStatement(value)
| PtxError::UnrecognizedDirective(value) => value.to_string(),
other => other.to_string(),
};
format!("PtxError::{}: {}", e.as_ref(), msg)
})
.collect();
let message = errors.join("\n");
CompilerError::GenericError {
cause: None,
message,
}
}
}
impl From<TranslateError> for CompilerError {
fn from(cause: TranslateError) -> Self {
let message = format!("PTX TranslateError::{}", cause.as_ref());
let cause = Some(Box::new(cause) as Box<dyn std::error::Error>);
CompilerError::GenericError { cause, message }
}
}
impl From<String> for CompilerError {
fn from(message: String) -> Self {
Self::GenericError {
cause: None,
message,
}
}
}
================================================
FILE: compiler/src/main.rs
================================================
use bpaf::Bpaf;
use error::CompilerError;
use std::ffi::CStr;
use std::fs::{self, File};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::process::ExitCode;
use std::str;
use std::time::Instant;
use std::{env, mem};
mod error;
const DEFAULT_ARCH: &'static str = "gfx1100";
#[derive(Debug, Clone, Bpaf)]
#[bpaf(options, version)]
pub struct Options {
#[bpaf(argument("output-dir"))]
/// Output directory
output_dir: Option<PathBuf>,
#[bpaf(long("arch"))]
/// Target GPU architecture
arch: Option<String>,
#[bpaf(long("ignore-errors"))]
/// Try to ignore errors. This will try and produce output even if there are
/// parsing errors (e.g. an unimplemented instruction)
ignore_errors: bool,
#[bpaf(positional("filename"))]
/// PTX file
ptx_path: String,
}
fn main() -> ExitCode {
if let Err(e) = main_core() {
eprintln!("Error: {}", e);
return ExitCode::FAILURE;
}
ExitCode::SUCCESS
}
fn main_core() -> Result<(), CompilerError> {
let opts = options().run();
let ptx_path = Path::new(&opts.ptx_path).to_path_buf();
let filename_base = ptx_path
.file_name()
.map(|osstr| osstr.to_str().unwrap_or("output"))
.unwrap_or("output");
let mut output_path = match opts.output_dir {
Some(value) => {
std::fs::create_dir_all(&value)?;
value
}
None => match ptx_path.parent() {
Some(dir) => dir.to_path_buf(),
None => env::current_dir()?,
},
};
output_path.push(filename_base);
let arch: String = match opts.arch {
Some(s) => s,
None => (|| {
let runtime = hip::Runtime::load()?;
runtime.init()?;
get_gpu_arch(&runtime)
})()
.unwrap_or_else(|_| DEFAULT_ARCH.to_owned()),
};
let ptx = fs::read(&ptx_path).map_err(CompilerError::from)?;
let ptx = str::from_utf8(&ptx).map_err(CompilerError::from)?;
let llvm = ptx_to_llvm(opts.ignore_errors, ptx).map_err(CompilerError::from)?;
write_to_file(&llvm.llvm_ir, output_path.with_extension("ll").as_path())?;
let compiler_hook = |bytes: &Vec<u8>, extension: String| {
let output_path = output_path.with_extension(extension);
write_to_file(bytes, &output_path).unwrap();
};
let mut start = Instant::now();
llvm_zluda::compile(
&llvm.context,
&arch,
llvm.main,
&llvm.linked_bitcode,
llvm.attributes,
Some(&compiler_hook),
)?;
report_pass_time("compile_bitcode", &mut start);
Ok(())
}
fn ptx_to_llvm(ignore_errors: bool, ptx: &str) -> Result<LLVMArtifacts, CompilerError> {
let ast = if ignore_errors {
ptx_parser::parse_module_unchecked(ptx)
} else {
ptx_parser::parse_module_checked(ptx).map_err(CompilerError::from)?
};
let mut start = Instant::now();
let module = ptx::to_llvm_module(
ast,
ptx::Attributes {
clock_rate: 2124000,
},
|pass| {
report_pass_time(pass, &mut start);
},
)
.map_err(CompilerError::from)?;
let llvm_ir = module.llvm_ir.print_module_to_string().to_bytes().to_vec();
let linked_bitcode = module.linked_bitcode().to_vec();
let main = module.llvm_ir;
let attributes = module.attributes_ir;
Ok(LLVMArtifacts {
context: module.context,
main,
linked_bitcode,
attributes,
llvm_ir,
})
}
fn report_pass_time(pass: &str, start: &mut Instant) {
let duration = start.elapsed();
println!("Pass {:?} took {:?}", pass, duration);
*start = Instant::now();
}
struct LLVMArtifacts {
main: llvm_zluda::utils::Module,
attributes: llvm_zluda::utils::Module,
context: llvm_zluda::utils::Context,
linked_bitcode: Vec<u8>,
llvm_ir: Vec<u8>,
}
fn get_gpu_arch(runtime: &hip::Runtime) -> Result<String, CompilerError> {
let mut dev_props = unsafe { mem::zeroed() };
runtime.device_get_properties(&mut dev_props, 0)?;
let gcn_arch_name = &dev_props.gcnArchName;
let gcn_arch_name = unsafe { CStr::from_ptr(gcn_arch_name.as_ptr()) };
let gcn_arch_name = gcn_arch_name.to_str()?;
Ok(gcn_arch_name.to_string())
}
fn write_to_file(content: &[u8], path: &Path) -> io::Result<()> {
let mut file = File::create(path)?;
file.write_all(content)?;
file.flush()?;
println!("Wrote to {}", path.to_str().unwrap());
Ok(())
}
mod hip {
use crate::error::CompilerError;
// We lazy load HIP runtime because we want to work on systems with no
// HIP driver installed
pub struct Runtime(libloading::Library);
impl Runtime {
fn hip_check(err: u32) -> Result<(), CompilerError> {
match err {
0 => Ok(()),
err_code => Err(CompilerError::HipError(err_code)),
}
}
pub fn load() -> Result<Self, CompilerError> {
#[cfg(windows)]
let lib_name_6 = "amdhip64_6.dll\0";
#[cfg(windows)]
let lib_name_7 = "amdhip64_7.dll\0";
#[cfg(unix)]
let lib_name_6 = "libamdhip64.so.6\0";
#[cfg(unix)]
let lib_name_7 = "libamdhip64.so.7\0";
let library = unsafe {
libloading::Library::new(lib_name_7)
.or_else(|_| libloading::Library::new(lib_name_6))?
};
Ok(Self(library))
}
pub fn init(&self) -> Result<(), CompilerError> {
unsafe {
let hip_init: libloading::Symbol<unsafe extern "C" fn(u32) -> u32> =
self.0.get(b"hipInit\0")?;
Self::hip_check(hip_init(0))
}
}
pub fn device_get_properties(
&self,
prop: &mut hipDeviceProp_tR0600,
device: i32,
) -> Result<(), CompilerError> {
unsafe {
let hip_get_device_properties: libloading::Symbol<
unsafe extern "C" fn(*mut hipDeviceProp_tR0600, i32) -> u32,
> = self.0.get(b"hipGetDevicePropertiesR0600\0")?;
Self::hip_check(hip_get_device_properties(prop, device))
}
}
}
#[allow(non_snake_case, non_camel_case_types)]
#[repr(C)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct hipDeviceProp_tR0600 {
///< Device name.
pub name: [::core::ffi::c_char; 256usize],
///< UUID of a device
pub uuid: hipUUID,
///< 8-byte unique identifier. Only valid on windows
pub luid: [::core::ffi::c_char; 8usize],
///< LUID node mask
pub luidDeviceNodeMask: ::core::ffi::c_uint,
///< Size of global memory region (in bytes).
pub totalGlobalMem: usize,
///< Size of shared memory per block (in bytes).
pub sharedMemPerBlock: usize,
///< Registers per block.
pub regsPerBlock: ::core::ffi::c_int,
///< Warp size.
pub warpSize: ::core::ffi::c_int,
/**< Maximum pitch in bytes allowed by memory copies
< pitched memory*/
pub memPitch: usize,
///< Max work items per work group or workgroup max size.
pub maxThreadsPerBlock: ::core::ffi::c_int,
///< Max number of threads in each dimension (XYZ) of a block.
pub maxThreadsDim: [::core::ffi::c_int; 3usize],
///< Max grid dimensions (XYZ).
pub maxGridSize: [::core::ffi::c_int; 3usize],
///< Max clock frequency of the multiProcessors in khz.
pub clockRate: ::core::ffi::c_int,
/**< Size of shared constant memory region on the device
< (in bytes).*/
pub totalConstMem: usize,
/**< Major compute capability. On HCC, this is an approximation and features may
< differ from CUDA CC. See the arch feature flags for portable ways to query
< feature caps.*/
pub major: ::core::ffi::c_int,
/**< Minor compute capability. On HCC, this is an approximation and features may
< differ from CUDA CC. See the arch feature flags for portable ways to query
< feature caps.*/
pub minor: ::core::ffi::c_int,
///< Alignment requirement for textures
pub textureAlignment: usize,
///< Pitch alignment requirement for texture references bound to
pub texturePitchAlignment: usize,
///< Deprecated. Use asyncEngineCount instead
pub deviceOverlap: ::core::ffi::c_int,
///< Number of multi-processors (compute units).
pub multiProcessorCount: ::core::ffi::c_int,
///< Run time limit for kernels executed on the device
pub kernelExecTimeoutEnabled: ::core::ffi::c_int,
///< APU vs dGPU
pub integrated: ::core::ffi::c_int,
///< Check whether HIP can map host memory
pub canMapHostMemory: ::core::ffi::c_int,
///< Compute mode.
pub computeMode: ::core::ffi::c_int,
///< Maximum number of elements in 1D images
pub maxTexture1D: ::core::ffi::c_int,
///< Maximum 1D mipmap texture size
pub maxTexture1DMipmap: ::core::ffi::c_int,
///< Maximum size for 1D textures bound to linear memory
pub maxTexture1DLinear: ::core::ffi::c_int,
///< Maximum dimensions (width, height) of 2D images, in image elements
pub maxTexture2D: [::core::ffi::c_int; 2usize],
///< Maximum number of elements in 2D array mipmap of images
pub maxTexture2DMipmap: [::core::ffi::c_int; 2usize],
///< Maximum 2D tex dimensions if tex are bound to pitched memory
pub maxTexture2DLinear: [::core::ffi::c_int; 3usize],
///< Maximum 2D tex dimensions if gather has to be performed
pub maxTexture2DGather: [::core::ffi::c_int; 2usize],
/**< Maximum dimensions (width, height, depth) of 3D images, in image
< elements*/
pub maxTexture3D: [::core::ffi::c_int; 3usize],
///< Maximum alternate 3D texture dims
pub maxTexture3DAlt: [::core::ffi::c_int; 3usize],
///< Maximum cubemap texture dims
pub maxTextureCubemap: ::core::ffi::c_int,
///< Maximum number of elements in 1D array images
pub maxTexture1DLayered: [::core::ffi::c_int; 2usize],
///< Maximum number of elements in 2D array images
pub maxTexture2DLayered: [::core::ffi::c_int; 3usize],
///< Maximum cubemaps layered texture dims
pub maxTextureCubemapLayered: [::core::ffi::c_int; 2usize],
///< Maximum 1D surface size
pub maxSurface1D: ::core::ffi::c_int,
///< Maximum 2D surface size
pub maxSurface2D: [::core::ffi::c_int; 2usize],
///< Maximum 3D surface size
pub maxSurface3D: [::core::ffi::c_int; 3usize],
///< Maximum 1D layered surface size
pub maxSurface1DLayered: [::core::ffi::c_int; 2usize],
///< Maximum 2D layared surface size
pub maxSurface2DLayered: [::core::ffi::c_int; 3usize],
///< Maximum cubemap surface size
pub maxSurfaceCubemap: ::core::ffi::c_int,
///< Maximum cubemap layered surface size
pub maxSurfaceCubemapLayered: [::core::ffi::c_int; 2usize],
///< Alignment requirement for surface
pub surfaceAlignment: usize,
///< Device can possibly execute multiple kernels concurrently.
pub concurrentKernels: ::core::ffi::c_int,
///< Device has ECC support enabled
pub ECCEnabled: ::core::ffi::c_int,
///< PCI Bus ID.
pub pciBusID: ::core::ffi::c_int,
///< PCI Device ID.
pub pciDeviceID: ::core::ffi::c_int,
///< PCI Domain ID
pub pciDomainID: ::core::ffi::c_int,
///< 1:If device is Tesla device using TCC driver, else 0
pub tccDriver: ::core::ffi::c_int,
///< Number of async engines
pub asyncEngineCount: ::core::ffi::c_int,
///< Does device and host share unified address space
pub unifiedAddressing: ::core::ffi::c_int,
///< Max global memory clock frequency in khz.
pub memoryClockRate: ::core::ffi::c_int,
///< Global memory bus width in bits.
pub memoryBusWidth: ::core::ffi::c_int,
///< L2 cache size.
pub l2CacheSize: ::core::ffi::c_int,
///< Device's max L2 persisting lines in bytes
pub persistingL2CacheMaxSize: ::core::ffi::c_int,
///< Maximum resident threads per multi-processor.
pub maxThreadsPerMultiProcessor: ::core::ffi::c_int,
///< Device supports stream priority
pub streamPrioritiesSupported: ::core::ffi::c_int,
///< Indicates globals are cached in L1
pub globalL1CacheSupported: ::core::ffi::c_int,
///< Locals are cahced in L1
pub localL1CacheSupported: ::core::ffi::c_int,
///< Amount of shared memory available per multiprocessor.
pub sharedMemPerMultiprocessor: usize,
///< registers available per multiprocessor
pub regsPerMultiprocessor: ::core::ffi::c_int,
///< Device supports allocating managed memory on this system
pub managedMemory: ::core::ffi::c_int,
///< 1 if device is on a multi-GPU board, 0 if not.
pub isMultiGpuBoard: ::core::ffi::c_int,
///< Unique identifier for a group of devices on same multiboard GPU
pub multiGpuBoardGroupID: ::core::ffi::c_int,
///< Link between host and device supports native atomics
pub hostNativeAtomicSupported: ::core::ffi::c_int,
///< Deprecated. CUDA only.
pub singleToDoublePrecisionPerfRatio: ::core::ffi::c_int,
/**< Device supports coherently accessing pageable memory
< without calling hipHostRegister on it*/
pub pageableMemoryAccess: ::core::ffi::c_int,
/**< Device can coherently access managed memory concurrently with
< the CPU*/
pub concurrentManagedAccess: ::core::ffi::c_int,
///< Is compute preemption supported on the device
pub computePreemptionSupported: ::core::ffi::c_int,
/**< Device can access host registered memory with same
< address as the host*/
pub canUseHostPointerForRegisteredMem: ::core::ffi::c_int,
///< HIP device supports cooperative launch
pub cooperativeLaunch: ::core::ffi::c_int,
/**< HIP device supports cooperative launch on multiple
< devices*/
pub cooperativeMultiDeviceLaunch: ::core::ffi::c_int,
///< Per device m ax shared mem per block usable by special opt in
pub sharedMemPerBlockOptin: usize,
/**< Device accesses pageable memory via the host's
< page tables*/
pub pageableMemoryAccessUsesHostPageTables: ::core::ffi::c_int,
/**< Host can directly access managed memory on the device
< without migration*/
pub directManagedMemAccessFromHost: ::core::ffi::c_int,
///< Max number of blocks on CU
pub maxBlocksPerMultiProcessor: ::core::ffi::c_int,
///< Max value of access policy window
pub accessPolicyMaxWindowSize: ::core::ffi::c_int,
///< Shared memory reserved by driver per block
pub reservedSharedMemPerBlock: usize,
///< Device supports hipHostRegister
pub hostRegisterSupported: ::core::ffi::c_int,
///< Indicates if device supports sparse hip arrays
pub sparseHipArraySupported: ::core::ffi::c_int,
/**< Device supports using the hipHostRegisterReadOnly flag
< with hipHostRegistger*/
pub hostRegisterReadOnlySupported: ::core::ffi::c_int,
///< Indicates external timeline semaphore support
pub timelineSemaphoreInteropSupported: ::core::ffi::c_int,
///< Indicates if device supports hipMallocAsync and hipMemPool APIs
pub memoryPoolsSupported: ::core::ffi::c_int,
///< Indicates device support of RDMA APIs
pub gpuDirectRDMASupported: ::core::ffi::c_int,
/**< Bitmask to be interpreted according to
< hipFlushGPUDirectRDMAWritesOptions*/
pub gpuDirectRDMAFlushWritesOptions: ::core::ffi::c_uint,
///< value of hipGPUDirectRDMAWritesOrdering
pub gpuDirectRDMAWritesOrdering: ::core::ffi::c_int,
///< Bitmask of handle types support with mempool based IPC
pub memoryPoolSupportedHandleTypes: ::core::ffi::c_uint,
/**< Device supports deferred mapping HIP arrays and HIP
< mipmapped arrays*/
pub deferredMappingHipArraySupported: ::core::ffi::c_int,
///< Device supports IPC events
pub ipcEventSupported: ::core::ffi::c_int,
///< Device supports cluster launch
pub clusterLaunch: ::core::ffi::c_int,
///< Indicates device supports unified function pointers
pub unifiedFunctionPointers: ::core::ffi::c_int,
///< CUDA Reserved.
pub reserved: [::core::ffi::c_int; 63usize],
///< Reserved for adding new entries for HIP/CUDA.
pub hipReserved: [::core::ffi::c_int; 32usize],
///< AMD GCN Arch Name. HIP Only.
pub gcnArchName: [::core::ffi::c_char; 256usize],
///< Maximum Shared Memory Per CU. HIP Only.
pub maxSharedMemoryPerMultiProcessor: usize,
/**< Frequency in khz of the timer used by the device-side "clock*"
< instructions. New for HIP.*/
pub clockInstructionRate: ::core::ffi::c_int,
///< Architectural feature flags. New for HIP.
pub arch: hipDeviceArch_t,
///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
pub hdpMemFlushCntl: *mut ::core::ffi::c_uint,
///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
pub hdpRegFlushCntl: *mut ::core::ffi::c_uint,
/**< HIP device supports cooperative launch on
< multiple*/
pub cooperativeMultiDeviceUnmatchedFunc: ::core::ffi::c_int,
/**< HIP device supports cooperative launch on
< multiple*/
pub cooperativeMultiDeviceUnmatchedGridDim: ::core::ffi::c_int,
/**< HIP device supports cooperative launch on
< multiple*/
pub cooperativeMultiDeviceUnmatchedBlockDim: ::core::ffi::c_int,
/**< HIP device supports cooperative launch on
< multiple*/
pub cooperativeMultiDeviceUnmatchedSharedMem: ::core::ffi::c_int,
///< 1: if it is a large PCI bar device, else 0
pub isLargeBar: ::core::ffi::c_int,
///< Revision of the GPU in this device
pub asicRevision: ::core::ffi::c_int,
}
#[allow(non_snake_case, non_camel_case_types)]
#[repr(C)]
#[repr(align(4))]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct hipDeviceArch_t {
pub _bitfield_align_1: [u8; 0],
pub _bitfield_1: __BindgenBitfieldUnit<[u8; 3usize]>,
pub __bindgen_padding_0: u8,
}
#[repr(C)]
#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct __BindgenBitfieldUnit<Storage> {
storage: Storage,
}
#[allow(non_camel_case_types)]
#[repr(C)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct hipUUID_t {
pub bytes: [::core::ffi::c_char; 16usize],
}
#[allow(non_camel_case_types)]
pub type hipUUID = hipUUID_t;
}
================================================
FILE: cuda_check/Cargo.toml
================================================
[package]
name = "cuda_check"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[[bin]]
name = "cuda_check"
path = "src/main.rs"
[dependencies]
cuda_types = { path = "../cuda_types" }
zluda_windows = { path = "../zluda_windows" }
bpaf = { version = "0.9.19", features = ["derive", "bright-color"] }
owo-colors = { version = "4", features = ["supports-colors"] }
rand = "0.9.2"
libloading = "0.8"
[target.'cfg(windows)'.dependencies]
windows = { version = "0.62.2", features = ["Win32_Foundation"] }
[package.metadata.zluda]
windows_only = true
================================================
FILE: cuda_check/src/main.rs
================================================
#[cfg(windows)]
mod win;
#[cfg(windows)]
fn main() {
win::main()
}
#[cfg(not(windows))]
fn main() {}
================================================
FILE: cuda_check/src/win.rs
================================================
use bpaf::{construct, pure, Bpaf, Parser};
use owo_colors::{OwoColorize, Stream};
use rand::seq::SliceRandom;
use std::{ffi::OsString, mem};
use windows::Win32::Foundation::HMODULE;
use zluda_windows::{get_module_path_utf16, LibraryInfo};
#[derive(Debug, Bpaf)]
#[bpaf(options)]
struct Options {
#[bpaf(external)]
libraries: Vec<&'static LibraryInfo>,
#[bpaf(switch)]
driver_first: bool,
}
fn libraries() -> impl Parser<Vec<&'static LibraryInfo>> {
zluda_windows::LIBRARIES.iter().fold(
Parser::boxed(pure(Vec::with_capacity(zluda_windows::LIBRARIES.len()))),
|parser, library| {
let dlls = library.ascii_name;
let arg = bpaf::long(library.short_name)
.help(&*format!("Look for {dlls}"))
.switch();
construct!(parser, arg)
.map(move |(mut acc, cur)| {
if cur {
acc.push(library);
}
acc
})
.boxed()
},
)
}
pub fn main() {
let mut opts = options().run();
if opts.libraries.is_empty() {
opts.libraries = zluda_windows::LIBRARIES.iter().collect();
}
let mut lib_set = opts.libraries;
if !opts.driver_first {
lib_set.shuffle(&mut rand::rng());
} else {
let (_, remainder) = lib_set.split_first_mut().unwrap();
remainder.shuffle(&mut rand::rng());
}
for lib in lib_set {
print_result(lib.short_name, unsafe { try_load_library(lib) });
}
}
fn print_result(short_name: &str, lib: Result<Option<OsString>, Error>) {
print!("{:<10}: ", short_name);
match lib {
Ok(None) => println!(
"{}",
"OK".if_supports_color(Stream::Stdout, |text| text.green())
),
Ok(Some(path)) => println!(
"{} ({})",
"OK".if_supports_color(Stream::Stdout, |text| text.green()),
path.display()
),
Err(err) => println!(
"{}",
format!("ERROR: {:?}", err).if_supports_color(Stream::Stdout, |text| text.red())
),
}
}
unsafe fn try_load_library(lib: &LibraryInfo) -> Result<Option<OsString>, Error> {
let library = if lib.in_system32 {
libloading::Library::new(lib.ascii_name)?
} else {
match std::env::var("CUDA_PATH") {
Ok(cuda_path) => {
let path = std::path::Path::new(&cuda_path)
.join("bin")
.join("x64")
.join(lib.ascii_name);
libloading::Library::new(path)?
}
Err(_) => libloading::Library::new(lib.ascii_name)?,
}
};
match lib.short_name {
"nvcuda" => check_cuda(library),
"nvml" => check_nvml(library),
"cudnn8" => check_cudnn8(library),
"cudnn9" => check_cudnn9(library),
"cublas11" => check_cublas(library),
"cublas12" => check_cublas(library),
"cublas13" => check_cublas(library),
"cublaslt11" => check_cublaslt(library),
"cublaslt12" => check_cublaslt(library),
"cublaslt13" => check_cublaslt(library),
"cusparse10" => check_cusparse(library),
"cusparse11" => check_cusparse(library),
"cusparse12" => check_cusparse(library),
"cufft10" => check_cufft(library),
"cufft11" => check_cufft(library),
"cufft12" => check_cufft(library),
_ => Err(Error::Initialization(
format!("Library check not implemented for {}", lib.short_name),
0,
)),
}
}
unsafe fn path_for_loaded_lib(lib: &'static str) -> Option<OsString> {
let lib = libloading::os::windows::Library::open_already_loaded(lib).ok()?;
let lib_handle = lib.into_raw();
let path = get_module_path_utf16(HMODULE(lib_handle as _));
libloading::os::windows::Library::from_raw(lib_handle);
Some(path)
}
unsafe fn check_cufft(library: libloading::Library) -> Result<Option<OsString>, Error> {
let hip_path = || path_for_loaded_lib("hipfft.dll");
let cufft_create = library.get::<extern "system" fn(
handle: *mut cuda_types::cufft::cufftHandle,
) -> cuda_types::cufft::cufftResult>(b"cufftCreate\0")?;
let cufft_destroy = library.get::<extern "system" fn(
handle: cuda_types::cufft::cufftHandle,
) -> cuda_types::cufft::cufftResult>(b"cufftDestroy\0")?;
let mut handle = mem::zeroed();
match cufft_create(&mut handle) {
Ok(()) => {}
Err(cuda_types::cufft::cufftError_t::NOT_SUPPORTED) => {
return Ok(hip_path());
}
Err(err) => {
return Err(Error::Initialization(
"cufftCreate".to_string(),
err.0.get() as usize,
));
}
}
let result = hip_path();
cufft_destroy(handle)
.map_err(|err| Error::Initialization("cufftDestroy".to_string(), err.0.get() as usize))?;
Ok(result)
}
unsafe fn check_cublas(library: libloading::Library) -> Result<Option<OsString>, Error> {
let hip_path = || path_for_loaded_lib("rocblas.dll");
let cublas_create = library.get::<extern "system" fn(
handle: *mut cuda_types::cublas::cublasHandle_t,
) -> cuda_types::cublas::cublasStatus_t>(b"cublasCreate_v2\0")?;
let cublas_destroy =
library.get::<extern "system" fn(
handle: cuda_types::cublas::cublasHandle_t,
) -> cuda_types::cublas::cublasStatus_t>(b"cublasDestroy_v2\0")?;
let mut handle = mem::zeroed();
cublas_create(&mut handle).map_err(|err| {
Error::Initialization("cublasCreate_v2".to_string(), err.0.get() as usize)
})?;
let result = hip_path();
cublas_destroy(handle).map_err(|err| {
Error::Initialization("cublasDestroy_v2".to_string(), err.0.get() as usize)
})?;
Ok(result)
}
unsafe fn check_cusparse(library: libloading::Library) -> Result<Option<OsString>, Error> {
let hip_path = || path_for_loaded_lib("rocsparse.dll");
let cusparse_create =
library.get::<extern "system" fn(
handle: *mut cuda_types::cusparse::cusparseHandle_t,
) -> cuda_types::cusparse::cusparseStatus_t>(b"cusparseCreate\0")?;
let cusparse_destroy =
library.get::<extern "system" fn(
handle: cuda_types::cusparse::cusparseHandle_t,
) -> cuda_types::cusparse::cusparseStatus_t>(b"cusparseDestroy\0")?;
let mut handle = mem::zeroed();
match cusparse_create(&mut handle) {
Ok(()) => {}
Err(cuda_types::cusparse::cusparseError_t::NOT_SUPPORTED) => {
return Ok(hip_path());
}
Err(err) => {
return Err(Error::Initialization(
"cusparseCreate".to_string(),
err.0.get() as usize,
));
}
}
let result = hip_path();
cusparse_destroy(handle).map_err(|err| {
Error::Initialization("cusparseDestroy".to_string(), err.0.get() as usize)
})?;
Ok(result)
}
unsafe fn check_cublaslt(library: libloading::Library) -> Result<Option<OsString>, Error> {
let hip_path =
|| path_for_loaded_lib("hipblaslt.dll").or_else(|| path_for_loaded_lib("libhipblaslt.dll"));
let cublaslt_create =
library.get::<extern "system" fn(
handle: *mut cuda_types::cublaslt::cublasLtHandle_t,
) -> cuda_types::cublas::cublasStatus_t>(b"cublasLtCreate\0")?;
let cublaslt_destroy =
library.get::<extern "system" fn(
handle: cuda_types::cublaslt::cublasLtHandle_t,
) -> cuda_types::cublas::cublasStatus_t>(b"cublasLtDestroy\0")?;
let mut handle = mem::zeroed();
cublaslt_create(&mut handle)
.map_err(|err| Error::Initialization("cublasLtCreate".to_string(), err.0.get() as usize))?;
let result = hip_path();
cublaslt_destroy(handle).map_err(|err| {
Error::Initialization("cublasLtDestroy".to_string(), err.0.get() as usize)
})?;
Ok(result)
}
unsafe fn check_cuda(library: libloading::Library) -> Result<Option<OsString>, Error> {
let cu_init = library
.get::<extern "system" fn(::core::ffi::c_uint) -> cuda_types::cuda::CUresult>(
b"cuInit\0",
)?;
cu_init(0).map_err(|err| Error::Initialization("cuInit".to_string(), err.0.get() as usize))?;
Ok(path_for_loaded_lib("amdhip64_7.dll").or_else(|| path_for_loaded_lib("amdhip64_6.dll")))
}
unsafe fn check_nvml(library: libloading::Library) -> Result<Option<OsString>, Error> {
use cuda_types::nvml::nvmlReturn_tConsts;
let nvml_init =
library.get::<extern "system" fn() -> cuda_types::nvml::nvmlReturn_t>(b"nvmlInit_v2\0")?;
match nvml_init() {
Ok(()) | cuda_types::nvml::nvmlReturn_t::ERROR_NOT_SUPPORTED => {
Ok(path_for_loaded_lib("rocm_smi64.dll"))
}
Err(err) => Err(Error::Initialization(
"nvmlInit_v2".to_string(),
err.0.get() as usize,
)),
}
}
unsafe fn check_cudnn8(library: libloading::Library) -> Result<Option<OsString>, Error> {
let hip_path = || path_for_loaded_lib("MIOpen.dll");
let cudnn_create = library.get::<extern "system" fn(
handle: *mut cuda_types::cudnn8::cudnnHandle_t,
) -> cuda_types::cudnn8::cudnnStatus_t>(b"cudnnCreate\0")?;
let cudnn_destroy = library.get::<extern "system" fn(
handle: cuda_types::cudnn8::cudnnHandle_t,
) -> cuda_types::cudnn8::cudnnStatus_t>(b"cudnnDestroy\0")?;
let mut handle = mem::zeroed();
cudnn_create(&mut handle)
.map_err(|err| Error::Initialization("cudnnCreate".to_string(), err.0.get() as usize))?;
let result = hip_path();
cudnn_destroy(handle)
.map_err(|err| Error::Initialization("cudnnDestroy".to_string(), err.0.get() as usize))?;
Ok(result)
}
unsafe fn check_cudnn9(library: libloading::Library) -> Result<Option<OsString>, Error> {
let hip_path = || path_for_loaded_lib("MIOpen.dll");
let cudnn_create = library.get::<extern "system" fn(
handle: *mut cuda_types::cudnn9::cudnnHandle_t,
) -> cuda_types::cudnn9::cudnnStatus_t>(b"cudnnCreate\0")?;
let cudnn_destroy = library.get::<extern "system" fn(
handle: cuda_types::cudnn9::cudnnHandle_t,
) -> cuda_types::cudnn9::cudnnStatus_t>(b"cudnnDestroy\0")?;
let mut handle = mem::zeroed();
cudnn_create(&mut handle)
.map_err(|err| Error::Initialization("cudnnCreate".to_string(), err.0.get() as usize))?;
let result = hip_path();
cudnn_destroy(handle)
.map_err(|err| Error::Initialization("cudnnDestroy".to_string(), err.0.get() as usize))?;
Ok(result)
}
#[derive(Debug)]
#[allow(dead_code)]
enum Error {
Loading(libloading::Error),
Initialization(String, usize),
}
impl From<libloading::Error> for Error {
fn from(err: libloading::Error) -> Self {
Error::Loading(err)
}
}
================================================
FILE: cuda_macros/.rustfmt.toml
================================================
disable_all_formatting = true
================================================
FILE: cuda_macros/Cargo.toml
================================================
[package]
name = "cuda_macros"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2021"
[dependencies]
quote = "1.0"
syn = { version = "2.0", features = ["full", "visit-mut", "extra-traits"] }
proc-macro2 = "1.0"
rustc-hash = "2.0.0"
[lib]
proc-macro = true
================================================
FILE: cuda_macros/build/wrapper.h
================================================
#define __CUDA_API_VERSION_INTERNAL
#include <cuda.h>
#include <cudaProfiler.h>
#include <cudaGL.h>
#include <cudaEGL.h>
#include <vdpau/vdpau.h>
#include <cudaVDPAU.h>
================================================
FILE: cuda_macros/src/cublas.rs
================================================
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
extern "system" {
#[must_use]
fn cublasCreate_v2(
handle: *mut cuda_types::cublas::cublasHandle_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDestroy_v2(
handle: cuda_types::cublas::cublasHandle_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetVersion_v2(
handle: cuda_types::cublas::cublasHandle_t,
version: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetProperty(
type_: cuda_types::cublas::libraryPropertyType,
value: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
fn cublasGetCudartVersion() -> usize;
#[must_use]
fn cublasSetWorkspace_v2(
handle: cuda_types::cublas::cublasHandle_t,
workspace: *mut ::core::ffi::c_void,
workspaceSizeInBytes: usize,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetStream_v2(
handle: cuda_types::cublas::cublasHandle_t,
streamId: cuda_types::cublas::cudaStream_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetStream_v2(
handle: cuda_types::cublas::cublasHandle_t,
streamId: *mut cuda_types::cublas::cudaStream_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetPointerMode_v2(
handle: cuda_types::cublas::cublasHandle_t,
mode: *mut cuda_types::cublas::cublasPointerMode_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetPointerMode_v2(
handle: cuda_types::cublas::cublasHandle_t,
mode: cuda_types::cublas::cublasPointerMode_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetAtomicsMode(
handle: cuda_types::cublas::cublasHandle_t,
mode: *mut cuda_types::cublas::cublasAtomicsMode_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetAtomicsMode(
handle: cuda_types::cublas::cublasHandle_t,
mode: cuda_types::cublas::cublasAtomicsMode_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetMathMode(
handle: cuda_types::cublas::cublasHandle_t,
mode: *mut cuda_types::cublas::cublasMath_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetMathMode(
handle: cuda_types::cublas::cublasHandle_t,
mode: cuda_types::cublas::cublasMath_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetSmCountTarget(
handle: cuda_types::cublas::cublasHandle_t,
smCountTarget: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetSmCountTarget(
handle: cuda_types::cublas::cublasHandle_t,
smCountTarget: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetEmulationStrategy(
handle: cuda_types::cublas::cublasHandle_t,
emulationStrategy: *mut cuda_types::cublas::cublasEmulationStrategy_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetEmulationStrategy(
handle: cuda_types::cublas::cublasHandle_t,
emulationStrategy: cuda_types::cublas::cublasEmulationStrategy_t,
) -> cuda_types::cublas::cublasStatus_t;
fn cublasGetStatusName(
status: cuda_types::cublas::cublasStatus_t,
) -> *const ::core::ffi::c_char;
fn cublasGetStatusString(
status: cuda_types::cublas::cublasStatus_t,
) -> *const ::core::ffi::c_char;
#[must_use]
fn cublasLoggerConfigure(
logIsOn: ::core::ffi::c_int,
logToStdOut: ::core::ffi::c_int,
logToStdErr: ::core::ffi::c_int,
logFileName: *const ::core::ffi::c_char,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetLoggerCallback(
userCallback: cuda_types::cublas::cublasLogCallback,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetLoggerCallback(
userCallback: *mut cuda_types::cublas::cublasLogCallback,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetVector(
n: ::core::ffi::c_int,
elemSize: ::core::ffi::c_int,
x: *const ::core::ffi::c_void,
incx: ::core::ffi::c_int,
devicePtr: *mut ::core::ffi::c_void,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetVector_64(
n: i64,
elemSize: i64,
x: *const ::core::ffi::c_void,
incx: i64,
devicePtr: *mut ::core::ffi::c_void,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetVector(
n: ::core::ffi::c_int,
elemSize: ::core::ffi::c_int,
x: *const ::core::ffi::c_void,
incx: ::core::ffi::c_int,
y: *mut ::core::ffi::c_void,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetVector_64(
n: i64,
elemSize: i64,
x: *const ::core::ffi::c_void,
incx: i64,
y: *mut ::core::ffi::c_void,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetMatrix(
rows: ::core::ffi::c_int,
cols: ::core::ffi::c_int,
elemSize: ::core::ffi::c_int,
A: *const ::core::ffi::c_void,
lda: ::core::ffi::c_int,
B: *mut ::core::ffi::c_void,
ldb: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetMatrix_64(
rows: i64,
cols: i64,
elemSize: i64,
A: *const ::core::ffi::c_void,
lda: i64,
B: *mut ::core::ffi::c_void,
ldb: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetMatrix(
rows: ::core::ffi::c_int,
cols: ::core::ffi::c_int,
elemSize: ::core::ffi::c_int,
A: *const ::core::ffi::c_void,
lda: ::core::ffi::c_int,
B: *mut ::core::ffi::c_void,
ldb: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetMatrix_64(
rows: i64,
cols: i64,
elemSize: i64,
A: *const ::core::ffi::c_void,
lda: i64,
B: *mut ::core::ffi::c_void,
ldb: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetVectorAsync(
n: ::core::ffi::c_int,
elemSize: ::core::ffi::c_int,
hostPtr: *const ::core::ffi::c_void,
incx: ::core::ffi::c_int,
devicePtr: *mut ::core::ffi::c_void,
incy: ::core::ffi::c_int,
stream: cuda_types::cublas::cudaStream_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetVectorAsync_64(
n: i64,
elemSize: i64,
hostPtr: *const ::core::ffi::c_void,
incx: i64,
devicePtr: *mut ::core::ffi::c_void,
incy: i64,
stream: cuda_types::cublas::cudaStream_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetVectorAsync(
n: ::core::ffi::c_int,
elemSize: ::core::ffi::c_int,
devicePtr: *const ::core::ffi::c_void,
incx: ::core::ffi::c_int,
hostPtr: *mut ::core::ffi::c_void,
incy: ::core::ffi::c_int,
stream: cuda_types::cublas::cudaStream_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetVectorAsync_64(
n: i64,
elemSize: i64,
devicePtr: *const ::core::ffi::c_void,
incx: i64,
hostPtr: *mut ::core::ffi::c_void,
incy: i64,
stream: cuda_types::cublas::cudaStream_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetMatrixAsync(
rows: ::core::ffi::c_int,
cols: ::core::ffi::c_int,
elemSize: ::core::ffi::c_int,
A: *const ::core::ffi::c_void,
lda: ::core::ffi::c_int,
B: *mut ::core::ffi::c_void,
ldb: ::core::ffi::c_int,
stream: cuda_types::cublas::cudaStream_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSetMatrixAsync_64(
rows: i64,
cols: i64,
elemSize: i64,
A: *const ::core::ffi::c_void,
lda: i64,
B: *mut ::core::ffi::c_void,
ldb: i64,
stream: cuda_types::cublas::cudaStream_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetMatrixAsync(
rows: ::core::ffi::c_int,
cols: ::core::ffi::c_int,
elemSize: ::core::ffi::c_int,
A: *const ::core::ffi::c_void,
lda: ::core::ffi::c_int,
B: *mut ::core::ffi::c_void,
ldb: ::core::ffi::c_int,
stream: cuda_types::cublas::cudaStream_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGetMatrixAsync_64(
rows: i64,
cols: i64,
elemSize: i64,
A: *const ::core::ffi::c_void,
lda: i64,
B: *mut ::core::ffi::c_void,
ldb: i64,
stream: cuda_types::cublas::cudaStream_t,
) -> cuda_types::cublas::cublasStatus_t;
fn cublasXerbla(srName: *const ::core::ffi::c_char, info: ::core::ffi::c_int) -> ();
#[must_use]
fn cublasNrm2Ex(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_void,
resultType: cuda_types::cublas::cudaDataType,
executionType: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasNrm2Ex_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
result: *mut ::core::ffi::c_void,
resultType: cuda_types::cublas::cudaDataType,
executionType: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSnrm2_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f32,
incx: ::core::ffi::c_int,
result: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSnrm2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f32,
incx: i64,
result: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDnrm2_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f64,
incx: ::core::ffi::c_int,
result: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDnrm2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f64,
incx: i64,
result: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasScnrm2_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
result: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasScnrm2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
result: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDznrm2_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
result: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDznrm2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
result: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDotEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
y: *const ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: ::core::ffi::c_int,
result: *mut ::core::ffi::c_void,
resultType: cuda_types::cublas::cudaDataType,
executionType: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDotEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
y: *const ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: i64,
result: *mut ::core::ffi::c_void,
resultType: cuda_types::cublas::cudaDataType,
executionType: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDotcEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
y: *const ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: ::core::ffi::c_int,
result: *mut ::core::ffi::c_void,
resultType: cuda_types::cublas::cudaDataType,
executionType: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDotcEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
y: *const ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: i64,
result: *mut ::core::ffi::c_void,
resultType: cuda_types::cublas::cudaDataType,
executionType: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSdot_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f32,
incx: ::core::ffi::c_int,
y: *const f32,
incy: ::core::ffi::c_int,
result: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSdot_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f32,
incx: i64,
y: *const f32,
incy: i64,
result: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDdot_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f64,
incx: ::core::ffi::c_int,
y: *const f64,
incy: ::core::ffi::c_int,
result: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDdot_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f64,
incx: i64,
y: *const f64,
incy: i64,
result: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCdotu_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
result: *mut cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCdotu_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
y: *const cuda_types::cublas::cuComplex,
incy: i64,
result: *mut cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCdotc_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
result: *mut cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCdotc_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
y: *const cuda_types::cublas::cuComplex,
incy: i64,
result: *mut cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZdotu_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
result: *mut cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZdotu_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: i64,
result: *mut cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZdotc_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
result: *mut cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZdotc_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: i64,
result: *mut cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasScalEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const ::core::ffi::c_void,
alphaType: cuda_types::cublas::cudaDataType,
x: *mut ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
executionType: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasScalEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const ::core::ffi::c_void,
alphaType: cuda_types::cublas::cudaDataType,
x: *mut ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
executionType: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSscal_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const f32,
x: *mut f32,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSscal_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const f32,
x: *mut f32,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDscal_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const f64,
x: *mut f64,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDscal_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const f64,
x: *mut f64,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCscal_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCscal_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCsscal_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const f32,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCsscal_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const f32,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZscal_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZscal_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZdscal_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const f64,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZdscal_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const f64,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasAxpyEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const ::core::ffi::c_void,
alphaType: cuda_types::cublas::cudaDataType,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
y: *mut ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: ::core::ffi::c_int,
executiontype: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasAxpyEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const ::core::ffi::c_void,
alphaType: cuda_types::cublas::cudaDataType,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
y: *mut ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: i64,
executiontype: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSaxpy_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const f32,
x: *const f32,
incx: ::core::ffi::c_int,
y: *mut f32,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSaxpy_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const f32,
x: *const f32,
incx: i64,
y: *mut f32,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDaxpy_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const f64,
x: *const f64,
incx: ::core::ffi::c_int,
y: *mut f64,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDaxpy_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const f64,
x: *const f64,
incx: i64,
y: *mut f64,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCaxpy_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCaxpy_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZaxpy_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZaxpy_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCopyEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
y: *mut ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCopyEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
y: *mut ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasScopy_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f32,
incx: ::core::ffi::c_int,
y: *mut f32,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasScopy_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f32,
incx: i64,
y: *mut f32,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDcopy_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f64,
incx: ::core::ffi::c_int,
y: *mut f64,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDcopy_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f64,
incx: i64,
y: *mut f64,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCcopy_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCcopy_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZcopy_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZcopy_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSswap_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut f32,
incx: ::core::ffi::c_int,
y: *mut f32,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSswap_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut f32,
incx: i64,
y: *mut f32,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDswap_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut f64,
incx: ::core::ffi::c_int,
y: *mut f64,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDswap_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut f64,
incx: i64,
y: *mut f64,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCswap_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCswap_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZswap_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZswap_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSwapEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
y: *mut ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSwapEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
y: *mut ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIsamax_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f32,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIsamax_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f32,
incx: i64,
result: *mut i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIdamax_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f64,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIdamax_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f64,
incx: i64,
result: *mut i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIcamax_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIcamax_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
result: *mut i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIzamax_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIzamax_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
result: *mut i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIamaxEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIamaxEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
result: *mut i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIsamin_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f32,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIsamin_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f32,
incx: i64,
result: *mut i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIdamin_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f64,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIdamin_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f64,
incx: i64,
result: *mut i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIcamin_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIcamin_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
result: *mut i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIzamin_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIzamin_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
result: *mut i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIaminEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasIaminEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
result: *mut i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasAsumEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
result: *mut ::core::ffi::c_void,
resultType: cuda_types::cublas::cudaDataType,
executiontype: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasAsumEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
result: *mut ::core::ffi::c_void,
resultType: cuda_types::cublas::cudaDataType,
executiontype: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSasum_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f32,
incx: ::core::ffi::c_int,
result: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSasum_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f32,
incx: i64,
result: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDasum_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const f64,
incx: ::core::ffi::c_int,
result: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDasum_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const f64,
incx: i64,
result: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasScasum_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
result: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasScasum_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
result: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDzasum_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
result: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDzasum_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
result: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSrot_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut f32,
incx: ::core::ffi::c_int,
y: *mut f32,
incy: ::core::ffi::c_int,
c: *const f32,
s: *const f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSrot_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut f32,
incx: i64,
y: *mut f32,
incy: i64,
c: *const f32,
s: *const f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDrot_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut f64,
incx: ::core::ffi::c_int,
y: *mut f64,
incy: ::core::ffi::c_int,
c: *const f64,
s: *const f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDrot_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut f64,
incx: i64,
y: *mut f64,
incy: i64,
c: *const f64,
s: *const f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCrot_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
c: *const f32,
s: *const cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCrot_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
c: *const f32,
s: *const cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCsrot_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
c: *const f32,
s: *const f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCsrot_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
c: *const f32,
s: *const f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZrot_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
c: *const f64,
s: *const cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZrot_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
c: *const f64,
s: *const cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZdrot_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
c: *const f64,
s: *const f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZdrot_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
c: *const f64,
s: *const f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasRotEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
y: *mut ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: ::core::ffi::c_int,
c: *const ::core::ffi::c_void,
s: *const ::core::ffi::c_void,
csType: cuda_types::cublas::cudaDataType,
executiontype: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasRotEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
y: *mut ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: i64,
c: *const ::core::ffi::c_void,
s: *const ::core::ffi::c_void,
csType: cuda_types::cublas::cudaDataType,
executiontype: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSrotg_v2(
handle: cuda_types::cublas::cublasHandle_t,
a: *mut f32,
b: *mut f32,
c: *mut f32,
s: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDrotg_v2(
handle: cuda_types::cublas::cublasHandle_t,
a: *mut f64,
b: *mut f64,
c: *mut f64,
s: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCrotg_v2(
handle: cuda_types::cublas::cublasHandle_t,
a: *mut cuda_types::cublas::cuComplex,
b: *mut cuda_types::cublas::cuComplex,
c: *mut f32,
s: *mut cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZrotg_v2(
handle: cuda_types::cublas::cublasHandle_t,
a: *mut cuda_types::cublas::cuDoubleComplex,
b: *mut cuda_types::cublas::cuDoubleComplex,
c: *mut f64,
s: *mut cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasRotgEx(
handle: cuda_types::cublas::cublasHandle_t,
a: *mut ::core::ffi::c_void,
b: *mut ::core::ffi::c_void,
abType: cuda_types::cublas::cudaDataType,
c: *mut ::core::ffi::c_void,
s: *mut ::core::ffi::c_void,
csType: cuda_types::cublas::cudaDataType,
executiontype: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSrotm_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut f32,
incx: ::core::ffi::c_int,
y: *mut f32,
incy: ::core::ffi::c_int,
param: *const f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSrotm_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut f32,
incx: i64,
y: *mut f32,
incy: i64,
param: *const f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDrotm_v2(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut f64,
incx: ::core::ffi::c_int,
y: *mut f64,
incy: ::core::ffi::c_int,
param: *const f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDrotm_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut f64,
incx: i64,
y: *mut f64,
incy: i64,
param: *const f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasRotmEx(
handle: cuda_types::cublas::cublasHandle_t,
n: ::core::ffi::c_int,
x: *mut ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: ::core::ffi::c_int,
y: *mut ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: ::core::ffi::c_int,
param: *const ::core::ffi::c_void,
paramType: cuda_types::cublas::cudaDataType,
executiontype: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasRotmEx_64(
handle: cuda_types::cublas::cublasHandle_t,
n: i64,
x: *mut ::core::ffi::c_void,
xType: cuda_types::cublas::cudaDataType,
incx: i64,
y: *mut ::core::ffi::c_void,
yType: cuda_types::cublas::cudaDataType,
incy: i64,
param: *const ::core::ffi::c_void,
paramType: cuda_types::cublas::cudaDataType,
executiontype: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSrotmg_v2(
handle: cuda_types::cublas::cublasHandle_t,
d1: *mut f32,
d2: *mut f32,
x1: *mut f32,
y1: *const f32,
param: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDrotmg_v2(
handle: cuda_types::cublas::cublasHandle_t,
d1: *mut f64,
d2: *mut f64,
x1: *mut f64,
y1: *const f64,
param: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasRotmgEx(
handle: cuda_types::cublas::cublasHandle_t,
d1: *mut ::core::ffi::c_void,
d1Type: cuda_types::cublas::cudaDataType,
d2: *mut ::core::ffi::c_void,
d2Type: cuda_types::cublas::cudaDataType,
x1: *mut ::core::ffi::c_void,
x1Type: cuda_types::cublas::cudaDataType,
y1: *const ::core::ffi::c_void,
y1Type: cuda_types::cublas::cudaDataType,
param: *mut ::core::ffi::c_void,
paramType: cuda_types::cublas::cudaDataType,
executiontype: cuda_types::cublas::cudaDataType,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSgemv_v2(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
A: *const f32,
lda: ::core::ffi::c_int,
x: *const f32,
incx: ::core::ffi::c_int,
beta: *const f32,
y: *mut f32,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSgemv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const f32,
A: *const f32,
lda: i64,
x: *const f32,
incx: i64,
beta: *const f32,
y: *mut f32,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDgemv_v2(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f64,
A: *const f64,
lda: ::core::ffi::c_int,
x: *const f64,
incx: ::core::ffi::c_int,
beta: *const f64,
y: *mut f64,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDgemv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const f64,
A: *const f64,
lda: i64,
x: *const f64,
incx: i64,
beta: *const f64,
y: *mut f64,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCgemv_v2(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
A: *const cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCgemv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
A: *const cuda_types::cublas::cuComplex,
lda: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZgemv_v2(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZgemv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSgbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
kl: ::core::ffi::c_int,
ku: ::core::ffi::c_int,
alpha: *const f32,
A: *const f32,
lda: ::core::ffi::c_int,
x: *const f32,
incx: ::core::ffi::c_int,
beta: *const f32,
y: *mut f32,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSgbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
kl: i64,
ku: i64,
alpha: *const f32,
A: *const f32,
lda: i64,
x: *const f32,
incx: i64,
beta: *const f32,
y: *mut f32,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDgbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
kl: ::core::ffi::c_int,
ku: ::core::ffi::c_int,
alpha: *const f64,
A: *const f64,
lda: ::core::ffi::c_int,
x: *const f64,
incx: ::core::ffi::c_int,
beta: *const f64,
y: *mut f64,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDgbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
kl: i64,
ku: i64,
alpha: *const f64,
A: *const f64,
lda: i64,
x: *const f64,
incx: i64,
beta: *const f64,
y: *mut f64,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCgbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
kl: ::core::ffi::c_int,
ku: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
A: *const cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCgbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
kl: i64,
ku: i64,
alpha: *const cuda_types::cublas::cuComplex,
A: *const cuda_types::cublas::cuComplex,
lda: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZgbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
kl: ::core::ffi::c_int,
ku: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZgbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
kl: i64,
ku: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStrmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
x: *mut f32,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStrmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
A: *const f32,
lda: i64,
x: *mut f32,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtrmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
x: *mut f64,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtrmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
A: *const f64,
lda: i64,
x: *mut f64,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtrmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
A: *const cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtrmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
A: *const cuda_types::cublas::cuComplex,
lda: i64,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtrmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtrmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: i64,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
x: *mut f32,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
k: i64,
A: *const f32,
lda: i64,
x: *mut f32,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
x: *mut f64,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
k: i64,
A: *const f64,
lda: i64,
x: *mut f64,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
k: i64,
A: *const cuda_types::cublas::cuComplex,
lda: i64,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
k: i64,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: i64,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStpmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
AP: *const f32,
x: *mut f32,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStpmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
AP: *const f32,
x: *mut f32,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtpmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
AP: *const f64,
x: *mut f64,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtpmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
AP: *const f64,
x: *mut f64,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtpmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
AP: *const cuda_types::cublas::cuComplex,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtpmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
AP: *const cuda_types::cublas::cuComplex,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtpmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
AP: *const cuda_types::cublas::cuDoubleComplex,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtpmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
AP: *const cuda_types::cublas::cuDoubleComplex,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStrsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
x: *mut f32,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStrsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
A: *const f32,
lda: i64,
x: *mut f32,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtrsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
x: *mut f64,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtrsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
A: *const f64,
lda: i64,
x: *mut f64,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtrsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
A: *const cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtrsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
A: *const cuda_types::cublas::cuComplex,
lda: i64,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtrsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtrsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: i64,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStpsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
AP: *const f32,
x: *mut f32,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStpsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
AP: *const f32,
x: *mut f32,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtpsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
AP: *const f64,
x: *mut f64,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtpsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
AP: *const f64,
x: *mut f64,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtpsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
AP: *const cuda_types::cublas::cuComplex,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtpsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
AP: *const cuda_types::cublas::cuComplex,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtpsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
AP: *const cuda_types::cublas::cuDoubleComplex,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtpsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
AP: *const cuda_types::cublas::cuDoubleComplex,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStbsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
x: *mut f32,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasStbsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
k: i64,
A: *const f32,
lda: i64,
x: *mut f32,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtbsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
x: *mut f64,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDtbsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
k: i64,
A: *const f64,
lda: i64,
x: *mut f64,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtbsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCtbsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
k: i64,
A: *const cuda_types::cublas::cuComplex,
lda: i64,
x: *mut cuda_types::cublas::cuComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtbsv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZtbsv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
trans: cuda_types::cublas::cublasOperation_t,
diag: cuda_types::cublas::cublasDiagType_t,
n: i64,
k: i64,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: i64,
x: *mut cuda_types::cublas::cuDoubleComplex,
incx: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSsymv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f32,
A: *const f32,
lda: ::core::ffi::c_int,
x: *const f32,
incx: ::core::ffi::c_int,
beta: *const f32,
y: *mut f32,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSsymv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f32,
A: *const f32,
lda: i64,
x: *const f32,
incx: i64,
beta: *const f32,
y: *mut f32,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDsymv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f64,
A: *const f64,
lda: ::core::ffi::c_int,
x: *const f64,
incx: ::core::ffi::c_int,
beta: *const f64,
y: *mut f64,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDsymv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f64,
A: *const f64,
lda: i64,
x: *const f64,
incx: i64,
beta: *const f64,
y: *mut f64,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCsymv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
A: *const cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCsymv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
A: *const cuda_types::cublas::cuComplex,
lda: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZsymv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZsymv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasChemv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
A: *const cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasChemv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
A: *const cuda_types::cublas::cuComplex,
lda: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZhemv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZhemv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSsbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
alpha: *const f32,
A: *const f32,
lda: ::core::ffi::c_int,
x: *const f32,
incx: ::core::ffi::c_int,
beta: *const f32,
y: *mut f32,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSsbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
k: i64,
alpha: *const f32,
A: *const f32,
lda: i64,
x: *const f32,
incx: i64,
beta: *const f32,
y: *mut f32,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDsbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
alpha: *const f64,
A: *const f64,
lda: ::core::ffi::c_int,
x: *const f64,
incx: ::core::ffi::c_int,
beta: *const f64,
y: *mut f64,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDsbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
k: i64,
alpha: *const f64,
A: *const f64,
lda: i64,
x: *const f64,
incx: i64,
beta: *const f64,
y: *mut f64,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasChbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
A: *const cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasChbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
k: i64,
alpha: *const cuda_types::cublas::cuComplex,
A: *const cuda_types::cublas::cuComplex,
lda: i64,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZhbmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZhbmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
k: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
A: *const cuda_types::cublas::cuDoubleComplex,
lda: i64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSspmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f32,
AP: *const f32,
x: *const f32,
incx: ::core::ffi::c_int,
beta: *const f32,
y: *mut f32,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSspmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f32,
AP: *const f32,
x: *const f32,
incx: i64,
beta: *const f32,
y: *mut f32,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDspmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f64,
AP: *const f64,
x: *const f64,
incx: ::core::ffi::c_int,
beta: *const f64,
y: *mut f64,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDspmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f64,
AP: *const f64,
x: *const f64,
incx: i64,
beta: *const f64,
y: *mut f64,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasChpmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
AP: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasChpmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
AP: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
beta: *const cuda_types::cublas::cuComplex,
y: *mut cuda_types::cublas::cuComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZhpmv_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
AP: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZhpmv_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
AP: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
beta: *const cuda_types::cublas::cuDoubleComplex,
y: *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSger_v2(
handle: cuda_types::cublas::cublasHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
x: *const f32,
incx: ::core::ffi::c_int,
y: *const f32,
incy: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSger_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
m: i64,
n: i64,
alpha: *const f32,
x: *const f32,
incx: i64,
y: *const f32,
incy: i64,
A: *mut f32,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDger_v2(
handle: cuda_types::cublas::cublasHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f64,
x: *const f64,
incx: ::core::ffi::c_int,
y: *const f64,
incy: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDger_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
m: i64,
n: i64,
alpha: *const f64,
x: *const f64,
incx: i64,
y: *const f64,
incy: i64,
A: *mut f64,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCgeru_v2(
handle: cuda_types::cublas::cublasHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCgeru_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
m: i64,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
y: *const cuda_types::cublas::cuComplex,
incy: i64,
A: *mut cuda_types::cublas::cuComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCgerc_v2(
handle: cuda_types::cublas::cublasHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCgerc_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
m: i64,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
y: *const cuda_types::cublas::cuComplex,
incy: i64,
A: *mut cuda_types::cublas::cuComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZgeru_v2(
handle: cuda_types::cublas::cublasHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZgeru_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
m: i64,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: i64,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZgerc_v2(
handle: cuda_types::cublas::cublasHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZgerc_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
m: i64,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: i64,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSsyr_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f32,
x: *const f32,
incx: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSsyr_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f32,
x: *const f32,
incx: i64,
A: *mut f32,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDsyr_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f64,
x: *const f64,
incx: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDsyr_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f64,
x: *const f64,
incx: i64,
A: *mut f64,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCsyr_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCsyr_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
A: *mut cuda_types::cublas::cuComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZsyr_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZsyr_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCher_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f32,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCher_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f32,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
A: *mut cuda_types::cublas::cuComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZher_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZher_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSspr_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f32,
x: *const f32,
incx: ::core::ffi::c_int,
AP: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSspr_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f32,
x: *const f32,
incx: i64,
AP: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDspr_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f64,
x: *const f64,
incx: ::core::ffi::c_int,
AP: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDspr_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f64,
x: *const f64,
incx: i64,
AP: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasChpr_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f32,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
AP: *mut cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasChpr_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f32,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
AP: *mut cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZhpr_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
AP: *mut cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZhpr_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f64,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
AP: *mut cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSsyr2_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f32,
x: *const f32,
incx: ::core::ffi::c_int,
y: *const f32,
incy: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSsyr2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f32,
x: *const f32,
incx: i64,
y: *const f32,
incy: i64,
A: *mut f32,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDsyr2_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f64,
x: *const f64,
incx: ::core::ffi::c_int,
y: *const f64,
incy: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDsyr2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f64,
x: *const f64,
incx: i64,
y: *const f64,
incy: i64,
A: *mut f64,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCsyr2_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCsyr2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
y: *const cuda_types::cublas::cuComplex,
incy: i64,
A: *mut cuda_types::cublas::cuComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZsyr2_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZsyr2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: i64,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCher2_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCher2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
y: *const cuda_types::cublas::cuComplex,
incy: i64,
A: *mut cuda_types::cublas::cuComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZher2_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZher2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: i64,
A: *mut cuda_types::cublas::cuDoubleComplex,
lda: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSspr2_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f32,
x: *const f32,
incx: ::core::ffi::c_int,
y: *const f32,
incy: ::core::ffi::c_int,
AP: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSspr2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f32,
x: *const f32,
incx: i64,
y: *const f32,
incy: i64,
AP: *mut f32,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDspr2_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const f64,
x: *const f64,
incx: ::core::ffi::c_int,
y: *const f64,
incy: ::core::ffi::c_int,
AP: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDspr2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const f64,
x: *const f64,
incx: i64,
y: *const f64,
incy: i64,
AP: *mut f64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasChpr2_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
AP: *mut cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasChpr2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
x: *const cuda_types::cublas::cuComplex,
incx: i64,
y: *const cuda_types::cublas::cuComplex,
incy: i64,
AP: *mut cuda_types::cublas::cuComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZhpr2_v2(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
AP: *mut cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZhpr2_v2_64(
handle: cuda_types::cublas::cublasHandle_t,
uplo: cuda_types::cublas::cublasFillMode_t,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
x: *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
y: *const cuda_types::cublas::cuDoubleComplex,
incy: i64,
AP: *mut cuda_types::cublas::cuDoubleComplex,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSgemvBatched(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
Aarray: *const *const f32,
lda: ::core::ffi::c_int,
xarray: *const *const f32,
incx: ::core::ffi::c_int,
beta: *const f32,
yarray: *const *mut f32,
incy: ::core::ffi::c_int,
batchCount: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSgemvBatched_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const f32,
Aarray: *const *const f32,
lda: i64,
xarray: *const *const f32,
incx: i64,
beta: *const f32,
yarray: *const *mut f32,
incy: i64,
batchCount: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDgemvBatched(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f64,
Aarray: *const *const f64,
lda: ::core::ffi::c_int,
xarray: *const *const f64,
incx: ::core::ffi::c_int,
beta: *const f64,
yarray: *const *mut f64,
incy: ::core::ffi::c_int,
batchCount: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasDgemvBatched_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const f64,
Aarray: *const *const f64,
lda: i64,
xarray: *const *const f64,
incx: i64,
beta: *const f64,
yarray: *const *mut f64,
incy: i64,
batchCount: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCgemvBatched(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuComplex,
Aarray: *const *const cuda_types::cublas::cuComplex,
lda: ::core::ffi::c_int,
xarray: *const *const cuda_types::cublas::cuComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuComplex,
yarray: *const *mut cuda_types::cublas::cuComplex,
incy: ::core::ffi::c_int,
batchCount: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasCgemvBatched_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const cuda_types::cublas::cuComplex,
Aarray: *const *const cuda_types::cublas::cuComplex,
lda: i64,
xarray: *const *const cuda_types::cublas::cuComplex,
incx: i64,
beta: *const cuda_types::cublas::cuComplex,
yarray: *const *mut cuda_types::cublas::cuComplex,
incy: i64,
batchCount: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZgemvBatched(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuda_types::cublas::cuDoubleComplex,
Aarray: *const *const cuda_types::cublas::cuDoubleComplex,
lda: ::core::ffi::c_int,
xarray: *const *const cuda_types::cublas::cuDoubleComplex,
incx: ::core::ffi::c_int,
beta: *const cuda_types::cublas::cuDoubleComplex,
yarray: *const *mut cuda_types::cublas::cuDoubleComplex,
incy: ::core::ffi::c_int,
batchCount: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasZgemvBatched_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const cuda_types::cublas::cuDoubleComplex,
Aarray: *const *const cuda_types::cublas::cuDoubleComplex,
lda: i64,
xarray: *const *const cuda_types::cublas::cuDoubleComplex,
incx: i64,
beta: *const cuda_types::cublas::cuDoubleComplex,
yarray: *const *mut cuda_types::cublas::cuDoubleComplex,
incy: i64,
batchCount: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasHSHgemvBatched(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
Aarray: *const *const cuda_types::cublas::__half,
lda: ::core::ffi::c_int,
xarray: *const *const cuda_types::cublas::__half,
incx: ::core::ffi::c_int,
beta: *const f32,
yarray: *const *mut cuda_types::cublas::__half,
incy: ::core::ffi::c_int,
batchCount: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasHSHgemvBatched_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const f32,
Aarray: *const *const cuda_types::cublas::__half,
lda: i64,
xarray: *const *const cuda_types::cublas::__half,
incx: i64,
beta: *const f32,
yarray: *const *mut cuda_types::cublas::__half,
incy: i64,
batchCount: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasHSSgemvBatched(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
Aarray: *const *const cuda_types::cublas::__half,
lda: ::core::ffi::c_int,
xarray: *const *const cuda_types::cublas::__half,
incx: ::core::ffi::c_int,
beta: *const f32,
yarray: *const *mut f32,
incy: ::core::ffi::c_int,
batchCount: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasHSSgemvBatched_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const f32,
Aarray: *const *const cuda_types::cublas::__half,
lda: i64,
xarray: *const *const cuda_types::cublas::__half,
incx: i64,
beta: *const f32,
yarray: *const *mut f32,
incy: i64,
batchCount: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasTSTgemvBatched(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
Aarray: *const *const cuda_types::cublas::__nv_bfloat16,
lda: ::core::ffi::c_int,
xarray: *const *const cuda_types::cublas::__nv_bfloat16,
incx: ::core::ffi::c_int,
beta: *const f32,
yarray: *const *mut cuda_types::cublas::__nv_bfloat16,
incy: ::core::ffi::c_int,
batchCount: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasTSTgemvBatched_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const f32,
Aarray: *const *const cuda_types::cublas::__nv_bfloat16,
lda: i64,
xarray: *const *const cuda_types::cublas::__nv_bfloat16,
incx: i64,
beta: *const f32,
yarray: *const *mut cuda_types::cublas::__nv_bfloat16,
incy: i64,
batchCount: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasTSSgemvBatched(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
Aarray: *const *const cuda_types::cublas::__nv_bfloat16,
lda: ::core::ffi::c_int,
xarray: *const *const cuda_types::cublas::__nv_bfloat16,
incx: ::core::ffi::c_int,
beta: *const f32,
yarray: *const *mut f32,
incy: ::core::ffi::c_int,
batchCount: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasTSSgemvBatched_64(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: i64,
n: i64,
alpha: *const f32,
Aarray: *const *const cuda_types::cublas::__nv_bfloat16,
lda: i64,
xarray: *const *const cuda_types::cublas::__nv_bfloat16,
incx: i64,
beta: *const f32,
yarray: *const *mut f32,
incy: i64,
batchCount: i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSgemvStridedBatched(
handle: cuda_types::cublas::cublasHandle_t,
trans: cuda_types::cublas::cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
A: *const f32,
lda: ::core::ffi::c_int,
strideA: ::core::ffi::c_longlong,
x: *const f32,
incx: ::core::ffi::c_int,
stridex: ::core::ffi::c_longlong,
beta: *const f32,
y: *mut f32,
incy: ::core::ffi::c_int,
stridey: ::core::ffi::c_longlong,
batchCount: ::core::ffi::c_int,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSgemvStridedBatched_64(
handle: cuda_types::cubl
gitextract_ehz427hu/
├── .cargo/
│ └── config.toml
├── .devcontainer/
│ ├── Dockerfile
│ └── devcontainer.json
├── .git-blame-ignore-revs
├── .gitattributes
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── config.yml
│ │ └── zluda_dump.yml
│ └── workflows/
│ ├── move_tests.sh
│ ├── nightly_tests.yml
│ ├── pr_master.yml
│ ├── push_master.yml
│ ├── rocm_setup_build.sh
│ ├── rocm_setup_run.sh
│ └── trigger_nightly_tests.yml
├── .gitignore
├── .gitmodules
├── .rustfmt.toml
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── compiler/
│ ├── Cargo.toml
│ └── src/
│ ├── error.rs
│ └── main.rs
├── cuda_check/
│ ├── Cargo.toml
│ └── src/
│ ├── main.rs
│ └── win.rs
├── cuda_macros/
│ ├── .rustfmt.toml
│ ├── Cargo.toml
│ ├── build/
│ │ └── wrapper.h
│ └── src/
│ ├── cublas.rs
│ ├── cublaslt.rs
│ ├── cublaslt_internal.rs
│ ├── cuda.rs
│ ├── cudnn8.rs
│ ├── cudnn9.rs
│ ├── cufft.rs
│ ├── cusparse.rs
│ ├── lib.rs
│ └── nvml.rs
├── cuda_types/
│ ├── .rustfmt.toml
│ ├── Cargo.toml
│ └── src/
│ ├── cublas.rs
│ ├── cublaslt.rs
│ ├── cuda.rs
│ ├── cudnn.rs
│ ├── cudnn8.rs
│ ├── cudnn9.rs
│ ├── cufft.rs
│ ├── cusparse.rs
│ ├── dark_api.rs
│ ├── lib.rs
│ └── nvml.rs
├── dark_api/
│ ├── Cargo.toml
│ └── src/
│ ├── fatbin.rs
│ └── lib.rs
├── detours-sys/
│ ├── Cargo.toml
│ ├── LICENSE-APACHE
│ ├── LICENSE-MIT
│ ├── README.md
│ ├── build/
│ │ └── wrapper.h
│ ├── build.rs
│ └── src/
│ ├── bundled_bindings.rs
│ └── lib.rs
├── docs/
│ ├── .gitignore
│ ├── .readthedocs.yaml
│ ├── book.toml
│ └── src/
│ ├── SUMMARY.md
│ ├── building.md
│ ├── faq.md
│ ├── hip_sdk.md
│ ├── llama_cpp.md
│ ├── precompiling.md
│ ├── quick_start.md
│ └── troubleshooting.md
├── ext/
│ ├── detours/
│ │ ├── .github/
│ │ │ ├── ISSUE_TEMPLATE/
│ │ │ │ ├── bug-report.md
│ │ │ │ └── question.md
│ │ │ ├── PULL_REQUEST_TEMPLATE/
│ │ │ │ └── pull_request_template.md
│ │ │ ├── codeql/
│ │ │ │ └── codeql-config.yml
│ │ │ └── workflows/
│ │ │ └── main.yml
│ │ ├── .gitignore
│ │ ├── CREDITS.TXT
│ │ ├── LICENSE.md
│ │ ├── Makefile
│ │ ├── README.md
│ │ ├── samples/
│ │ │ ├── Makefile
│ │ │ ├── README.TXT
│ │ │ ├── comeasy/
│ │ │ │ ├── Makefile
│ │ │ │ ├── comeasy.cpp
│ │ │ │ ├── wrotei.cpp
│ │ │ │ └── wrotei.rc
│ │ │ ├── commem/
│ │ │ │ ├── Makefile
│ │ │ │ └── commem.cpp
│ │ │ ├── common.mak
│ │ │ ├── cping/
│ │ │ │ ├── Makefile
│ │ │ │ ├── ReadMe.Txt
│ │ │ │ ├── cping.cpp
│ │ │ │ └── iping.idl
│ │ │ ├── disas/
│ │ │ │ ├── Makefile
│ │ │ │ ├── arm.asm
│ │ │ │ ├── disas.cpp
│ │ │ │ ├── ia64.asm
│ │ │ │ ├── unk.cpp
│ │ │ │ ├── x64.asm
│ │ │ │ └── x86.cpp
│ │ │ ├── dtest/
│ │ │ │ ├── Makefile
│ │ │ │ ├── NORMAL_IA64.TXT
│ │ │ │ ├── NORMAL_X64.TXT
│ │ │ │ ├── NORMAL_X86.TXT
│ │ │ │ ├── dtarge.cpp
│ │ │ │ ├── dtarge.h
│ │ │ │ ├── dtarge.rc
│ │ │ │ └── dtest.cpp
│ │ │ ├── dumpe/
│ │ │ │ ├── Makefile
│ │ │ │ └── dumpe.cpp
│ │ │ ├── dumpi/
│ │ │ │ ├── Makefile
│ │ │ │ └── dumpi.cpp
│ │ │ ├── dynamic_alloc/
│ │ │ │ ├── Makefile
│ │ │ │ ├── main.cpp
│ │ │ │ ├── x64.asm
│ │ │ │ └── x86.asm
│ │ │ ├── echo/
│ │ │ │ ├── Makefile
│ │ │ │ ├── echofx.cpp
│ │ │ │ ├── echofx.rc
│ │ │ │ ├── echonul.cpp
│ │ │ │ └── main.cpp
│ │ │ ├── einst/
│ │ │ │ ├── Makefile
│ │ │ │ ├── edll1x.cpp
│ │ │ │ ├── edll2x.cpp
│ │ │ │ ├── edll3x.cpp
│ │ │ │ └── einst.cpp
│ │ │ ├── excep/
│ │ │ │ ├── Makefile
│ │ │ │ ├── excep.cpp
│ │ │ │ ├── firstexc.cpp
│ │ │ │ └── firstexc.h
│ │ │ ├── findfunc/
│ │ │ │ ├── Makefile
│ │ │ │ ├── extend.cpp
│ │ │ │ ├── extend.rc
│ │ │ │ ├── findfunc.cpp
│ │ │ │ ├── symtest.cpp
│ │ │ │ ├── target.cpp
│ │ │ │ ├── target.h
│ │ │ │ └── target.rc
│ │ │ ├── impmunge/
│ │ │ │ ├── Makefile
│ │ │ │ └── impmunge.cpp
│ │ │ ├── member/
│ │ │ │ ├── Makefile
│ │ │ │ └── member.cpp
│ │ │ ├── opengl/
│ │ │ │ ├── Makefile
│ │ │ │ ├── ogldet.cpp
│ │ │ │ ├── ogldet.rc
│ │ │ │ └── testogl.cpp
│ │ │ ├── region/
│ │ │ │ ├── Makefile
│ │ │ │ └── region.cpp
│ │ │ ├── setdll/
│ │ │ │ ├── Makefile
│ │ │ │ └── setdll.cpp
│ │ │ ├── simple/
│ │ │ │ ├── Makefile
│ │ │ │ ├── simple.cpp
│ │ │ │ ├── simple.rc
│ │ │ │ └── sleep5.cpp
│ │ │ ├── slept/
│ │ │ │ ├── Makefile
│ │ │ │ ├── NORMAL_IA64.TXT
│ │ │ │ ├── NORMAL_X64.TXT
│ │ │ │ ├── NORMAL_X86.TXT
│ │ │ │ ├── dslept.cpp
│ │ │ │ ├── dslept.rc
│ │ │ │ ├── sleepbed.cpp
│ │ │ │ ├── sleepnew.cpp
│ │ │ │ ├── sleepold.cpp
│ │ │ │ ├── slept.cpp
│ │ │ │ ├── slept.h
│ │ │ │ ├── slept.rc
│ │ │ │ └── verify.cpp
│ │ │ ├── syelog/
│ │ │ │ ├── Makefile
│ │ │ │ ├── sltest.cpp
│ │ │ │ ├── sltestp.cpp
│ │ │ │ ├── syelog.cpp
│ │ │ │ ├── syelog.h
│ │ │ │ └── syelogd.cpp
│ │ │ ├── talloc/
│ │ │ │ ├── Makefile
│ │ │ │ ├── NORMAL_IA64.TXT
│ │ │ │ ├── NORMAL_X64.TXT
│ │ │ │ ├── talloc.cpp
│ │ │ │ ├── tdll1x.cpp
│ │ │ │ ├── tdll2x.cpp
│ │ │ │ ├── tdll3x.cpp
│ │ │ │ ├── tdll4x.cpp
│ │ │ │ ├── tdll5x.cpp
│ │ │ │ ├── tdll6x.cpp
│ │ │ │ ├── tdll7x.cpp
│ │ │ │ ├── tdll8x.cpp
│ │ │ │ └── tdll9x.cpp
│ │ │ ├── traceapi/
│ │ │ │ ├── Makefile
│ │ │ │ ├── _win32.cpp
│ │ │ │ ├── testapi.cpp
│ │ │ │ ├── trcapi.cpp
│ │ │ │ └── trcapi.rc
│ │ │ ├── tracebld/
│ │ │ │ ├── Makefile
│ │ │ │ ├── tracebld.cpp
│ │ │ │ ├── tracebld.h
│ │ │ │ ├── trcbld.cpp
│ │ │ │ └── trcbld.rc
│ │ │ ├── tracelnk/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trclnk.cpp
│ │ │ │ └── trclnk.rc
│ │ │ ├── tracemem/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trcmem.cpp
│ │ │ │ └── trcmem.rc
│ │ │ ├── tracereg/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trcreg.cpp
│ │ │ │ └── trcreg.rc
│ │ │ ├── traceser/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trcser.cpp
│ │ │ │ └── trcser.rc
│ │ │ ├── tracessl/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trcssl.cpp
│ │ │ │ └── trcssl.rc
│ │ │ ├── tracetcp/
│ │ │ │ ├── Makefile
│ │ │ │ ├── trctcp.cpp
│ │ │ │ └── trctcp.rc
│ │ │ ├── tryman/
│ │ │ │ ├── Makefile
│ │ │ │ ├── managed.cs
│ │ │ │ ├── size.cpp
│ │ │ │ ├── tryman.cpp
│ │ │ │ ├── tstman.cpp
│ │ │ │ └── tstman.rc
│ │ │ └── withdll/
│ │ │ ├── Makefile
│ │ │ └── withdll.cpp
│ │ ├── src/
│ │ │ ├── Makefile
│ │ │ ├── creatwth.cpp
│ │ │ ├── detours.cpp
│ │ │ ├── detours.h
│ │ │ ├── detver.h
│ │ │ ├── disasm.cpp
│ │ │ ├── disolarm.cpp
│ │ │ ├── disolarm64.cpp
│ │ │ ├── disolia64.cpp
│ │ │ ├── disolx64.cpp
│ │ │ ├── disolx86.cpp
│ │ │ ├── image.cpp
│ │ │ ├── modules.cpp
│ │ │ └── uimports.cpp
│ │ ├── system.mak
│ │ ├── tests/
│ │ │ ├── Makefile
│ │ │ ├── catch.hpp
│ │ │ ├── corruptor.cpp
│ │ │ ├── corruptor.h
│ │ │ ├── main.cpp
│ │ │ ├── test_image_api.cpp
│ │ │ └── test_module_api.cpp
│ │ └── vc/
│ │ ├── Detours.sln
│ │ ├── Detours.vcxproj
│ │ └── Detours.vcxproj.filters
│ ├── highs-sys/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── build.rs
│ │ ├── install-dependencies.sh
│ │ ├── src/
│ │ │ ├── c_bindings.rs
│ │ │ └── lib.rs
│ │ ├── tests/
│ │ │ ├── test_highs_call.rs
│ │ │ └── test_highs_functions.rs
│ │ └── wrapper.h
│ ├── hip_runtime-sys/
│ │ ├── .rustfmt.toml
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ └── src/
│ │ └── lib.rs
│ ├── hipblaslt-sys/
│ │ ├── .rustfmt.toml
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ └── src/
│ │ └── lib.rs
│ ├── miopen-sys/
│ │ ├── .rustfmt.toml
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ └── src/
│ │ └── lib.rs
│ ├── rocblas-sys/
│ │ ├── .rustfmt.toml
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ └── src/
│ │ └── lib.rs
│ ├── rocm_smi-sys/
│ │ ├── .rustfmt.toml
│ │ ├── Cargo.toml
│ │ ├── build.rs
│ │ └── src/
│ │ └── lib.rs
│ └── rocsparse-sys/
│ ├── .rustfmt.toml
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ └── lib.rs
├── format/
│ ├── .rustfmt.toml
│ ├── Cargo.toml
│ └── src/
│ ├── dark_api.rs
│ ├── dnn8.rs
│ ├── dnn9.rs
│ ├── format_generated.rs
│ ├── format_generated_blas.rs
│ ├── format_generated_blaslt.rs
│ ├── format_generated_blaslt_internal.rs
│ ├── format_generated_dnn8.rs
│ ├── format_generated_dnn9.rs
│ ├── format_generated_fft.rs
│ ├── format_generated_nvml.rs
│ ├── format_generated_sparse.rs
│ └── lib.rs
├── llvm_zluda/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── compile.rs
│ ├── device-libs/
│ │ ├── LICENSE.TXT
│ │ ├── README.md
│ │ ├── ockl.bc
│ │ └── ocml.bc
│ ├── ffi.rs
│ ├── lib.cpp
│ ├── lib.rs
│ └── utils.rs
├── ptx/
│ ├── Cargo.toml
│ ├── lib/
│ │ ├── zluda_ptx_impl.bc
│ │ └── zluda_ptx_impl.cpp
│ └── src/
│ ├── lib.rs
│ ├── pass/
│ │ ├── deparamize_functions.rs
│ │ ├── expand_operands.rs
│ │ ├── fix_special_registers.rs
│ │ ├── hoist_globals.rs
│ │ ├── insert_explicit_load_store.rs
│ │ ├── insert_implicit_conversions.rs
│ │ ├── insert_post_saturation.rs
│ │ ├── instruction_mode_to_global_mode/
│ │ │ ├── call_with_mode.ptx
│ │ │ ├── fold_denormal.ptx
│ │ │ ├── mod.rs
│ │ │ └── test.rs
│ │ ├── llvm/
│ │ │ ├── attributes.rs
│ │ │ ├── emit.rs
│ │ │ └── mod.rs
│ │ ├── mod.rs
│ │ ├── normalize_basic_blocks.rs
│ │ ├── normalize_identifiers.rs
│ │ ├── normalize_predicates.rs
│ │ ├── remove_unreachable_basic_blocks.rs
│ │ ├── replace_instructions_with_functions.rs
│ │ ├── replace_instructions_with_functions_fp_required.rs
│ │ ├── replace_known_functions.rs
│ │ ├── resolve_function_pointers.rs
│ │ └── test/
│ │ ├── expand_operands/
│ │ │ ├── immediate_conversion.ptx
│ │ │ ├── immediates.ptx
│ │ │ ├── mod.rs
│ │ │ ├── vector_extract.ptx
│ │ │ ├── vector_operand.ptx
│ │ │ └── vector_operand_convert.ptx
│ │ ├── insert_implicit_conversions/
│ │ │ ├── default.ptx
│ │ │ ├── default_reg_b32_reg_f16x2.ptx
│ │ │ ├── default_reg_b32_reg_v2_b16.ptx
│ │ │ ├── default_relaxed.ptx
│ │ │ └── mod.rs
│ │ ├── instruction_mode_to_global_mode/
│ │ │ ├── mod.rs
│ │ │ └── mode_conflict.ptx
│ │ ├── mod.rs
│ │ └── normalize_basic_blocks/
│ │ ├── mod.rs
│ │ └── trap.ptx
│ └── test/
│ ├── _Z9vectorAddPKfS0_Pfi.ptx
│ ├── ll/
│ │ ├── _attributes.ll
│ │ ├── abs.ll
│ │ ├── activemask.ll
│ │ ├── add.ll
│ │ ├── add_extended.ll
│ │ ├── add_ftz.ll
│ │ ├── add_non_coherent.ll
│ │ ├── add_s32_sat.ll
│ │ ├── add_tuning.ll
│ │ ├── addc_cc_s32.ll
│ │ ├── and.ll
│ │ ├── assertfail.ll
│ │ ├── atom_add.ll
│ │ ├── atom_add_float.ll
│ │ ├── atom_cas.ll
│ │ ├── atom_inc.ll
│ │ ├── b64tof64.ll
│ │ ├── bar_red_and_pred.ll
│ │ ├── bench.ll
│ │ ├── bfe.ll
│ │ ├── bfi.ll
│ │ ├── block.ll
│ │ ├── bmsk_clamp_b32.ll
│ │ ├── bra.ll
│ │ ├── brev.ll
│ │ ├── call.ll
│ │ ├── call_rnd.ll
│ │ ├── clz.ll
│ │ ├── const.ll
│ │ ├── const_ident.ll
│ │ ├── constant_f32.ll
│ │ ├── constant_negative.ll
│ │ ├── copysign.ll
│ │ ├── cos.ll
│ │ ├── cp_async.ll
│ │ ├── createpolicy.ll
│ │ ├── cvt_f16x2_f32.ll
│ │ ├── cvt_f64_f32.ll
│ │ ├── cvt_pack.ll
│ │ ├── cvt_relu_f16x2_f32.ll
│ │ ├── cvt_rn_bf16x2_f32.ll
│ │ ├── cvt_rn_f16x2_e4m3x2.ll
│ │ ├── cvt_rn_f16x2_e5m2x2.ll
│ │ ├── cvt_rn_satfinite_e4m3x2_f32.ll
│ │ ├── cvt_rn_satfinite_e5m2x2_f32.ll
│ │ ├── cvt_rni.ll
│ │ ├── cvt_rni_u16_f32.ll
│ │ ├── cvt_rzi.ll
│ │ ├── cvt_s16_s8.ll
│ │ ├── cvt_s32_f32.ll
│ │ ├── cvt_s64_s32.ll
│ │ ├── cvt_sat_s_u.ll
│ │ ├── cvta.ll
│ │ ├── div_approx.ll
│ │ ├── div_ftz.ll
│ │ ├── div_noftz.ll
│ │ ├── dp2a.ll
│ │ ├── dp4a.ll
│ │ ├── ex2.ll
│ │ ├── extern_func.ll
│ │ ├── extern_shared.ll
│ │ ├── extern_shared_call.ll
│ │ ├── fma.ll
│ │ ├── fma_bf16x2.ll
│ │ ├── fma_f16x2.ll
│ │ ├── fmax.ll
│ │ ├── global_array.ll
│ │ ├── global_array_f32.ll
│ │ ├── lanemask_le.ll
│ │ ├── lanemask_lt.ll
│ │ ├── ld_st.ll
│ │ ├── ld_st_implicit.ll
│ │ ├── ld_st_offset.ll
│ │ ├── ldmatrix.ll
│ │ ├── ldmatrix_trans.ll
│ │ ├── lg2.ll
│ │ ├── local_align.ll
│ │ ├── mad_extended.ll
│ │ ├── mad_s32.ll
│ │ ├── mad_wide.ll
│ │ ├── malformed_label.ll
│ │ ├── max.ll
│ │ ├── membar.ll
│ │ ├── min.ll
│ │ ├── min_f16.ll
│ │ ├── min_nan_f16.ll
│ │ ├── mma_m16n8k16_f32_bf16_bf16_f32.ll
│ │ ├── mma_m16n8k16_f32_bf16_bf16_f32_2x.ll
│ │ ├── mma_m16n8k16_f32_f16_f16_f32.ll
│ │ ├── mma_m16n8k32_s32_s8_s8_s32.ll
│ │ ├── mma_m16n8k32_s32_s8_s8_s32_interleave.ll
│ │ ├── mov.ll
│ │ ├── mov_address.ll
│ │ ├── mul24_hi_s32.ll
│ │ ├── mul24_hi_u32.ll
│ │ ├── mul24_lo_s32.ll
│ │ ├── mul24_lo_u32.ll
│ │ ├── mul_ftz.ll
│ │ ├── mul_hi.ll
│ │ ├── mul_lo.ll
│ │ ├── mul_non_ftz.ll
│ │ ├── mul_wide.ll
│ │ ├── multiple_return.ll
│ │ ├── nanosleep.ll
│ │ ├── neg.ll
│ │ ├── non_scalar_ptr_offset.ll
│ │ ├── noreturn.ll
│ │ ├── not.ll
│ │ ├── ntid.ll
│ │ ├── or.ll
│ │ ├── param_is_addressable.ll
│ │ ├── popc.ll
│ │ ├── pred_not.ll
│ │ ├── prmt.ll
│ │ ├── prmt_slow.ll
│ │ ├── rcp.ll
│ │ ├── redux_sync_add_u32_partial.ll
│ │ ├── redux_sync_op_s32.ll
│ │ ├── redux_sync_op_u32.ll
│ │ ├── reg_local.ll
│ │ ├── reg_multi.ll
│ │ ├── rem.ll
│ │ ├── rsqrt.ll
│ │ ├── sad_s64.ll
│ │ ├── selp.ll
│ │ ├── selp_true.ll
│ │ ├── set_f16.ll
│ │ ├── setp.ll
│ │ ├── setp_gt.ll
│ │ ├── setp_leu.ll
│ │ ├── setp_nan.ll
│ │ ├── setp_num.ll
│ │ ├── shared_ptr_32.ll
│ │ ├── shared_ptr_take_address.ll
│ │ ├── shared_unify_extern.ll
│ │ ├── shared_unify_local.ll
│ │ ├── shared_variable.ll
│ │ ├── shf_l.ll
│ │ ├── shf_l_clamp.ll
│ │ ├── shf_l_wrap.ll
│ │ ├── shf_r.ll
│ │ ├── shf_r_clamp.ll
│ │ ├── shf_r_wrap.ll
│ │ ├── shfl_sync_bfly_b32_pred.ll
│ │ ├── shfl_sync_down_b32_pred.ll
│ │ ├── shfl_sync_idx_b32_pred.ll
│ │ ├── shfl_sync_mode_b32.ll
│ │ ├── shfl_sync_up_b32_pred.ll
│ │ ├── shl.ll
│ │ ├── shr.ll
│ │ ├── shr_oob.ll
│ │ ├── sign_extend.ll
│ │ ├── sin.ll
│ │ ├── sqrt.ll
│ │ ├── sqrt_rn_ftz.ll
│ │ ├── stateful_ld_st_ntid.ll
│ │ ├── stateful_ld_st_ntid_chain.ll
│ │ ├── stateful_ld_st_ntid_sub.ll
│ │ ├── stateful_ld_st_simple.ll
│ │ ├── stateful_neg_offset.ll
│ │ ├── sub.ll
│ │ ├── sub_extended.ll
│ │ ├── subc_cc_s32.ll
│ │ ├── tanh.ll
│ │ ├── tid.ll
│ │ ├── trap.ll
│ │ ├── uint_to_fp_bf16.ll
│ │ ├── vector.ll
│ │ ├── vector4.ll
│ │ ├── vector8.ll
│ │ ├── vector8_extract.ll
│ │ ├── vector_extract.ll
│ │ ├── vector_operand.ll
│ │ ├── vote_all.ll
│ │ ├── vote_all_sub.ll
│ │ ├── vote_any.ll
│ │ ├── vote_ballot.ll
│ │ ├── warp_sz.ll
│ │ └── xor.ll
│ ├── mod.rs
│ ├── operands.ptx
│ ├── spirv_build/
│ │ ├── bar_sync.ptx
│ │ ├── global_extern_array.ptx
│ │ └── param_func_array_0.ptx
│ ├── spirv_fail/
│ │ ├── const_ptr.ptx
│ │ ├── global_ptr.ptx
│ │ ├── local_ptr.txt
│ │ ├── param_entry_array_0.ptx
│ │ ├── param_vector.ptx
│ │ ├── shared_ptr.ptx
│ │ └── shared_ptr2.ptx
│ ├── spirv_run/
│ │ ├── abs.ptx
│ │ ├── activemask.ptx
│ │ ├── add.ptx
│ │ ├── add_extended.ptx
│ │ ├── add_ftz.ptx
│ │ ├── add_non_coherent.ptx
│ │ ├── add_s32_sat.ptx
│ │ ├── add_tuning.ptx
│ │ ├── addc_cc_s32.ptx
│ │ ├── and.ptx
│ │ ├── assertfail.ptx
│ │ ├── atom_add.ptx
│ │ ├── atom_add_float.ptx
│ │ ├── atom_cas.ptx
│ │ ├── atom_inc.ptx
│ │ ├── atomics_128.ptx
│ │ ├── b64tof64.ptx
│ │ ├── bar_red_and_pred.ptx
│ │ ├── bfe.ptx
│ │ ├── bfi.ptx
│ │ ├── block.ptx
│ │ ├── bmsk_clamp_b32.ptx
│ │ ├── bra.ptx
│ │ ├── brev.ptx
│ │ ├── call.ptx
│ │ ├── call_rnd.ptx
│ │ ├── clz.ptx
│ │ ├── const.ptx
│ │ ├── const_ident.ptx
│ │ ├── constant_f32.ptx
│ │ ├── constant_negative.ptx
│ │ ├── copysign.ptx
│ │ ├── cos.ptx
│ │ ├── cp_async.ptx
│ │ ├── createpolicy.ptx
│ │ ├── cvt_f16x2_f32.ptx
│ │ ├── cvt_f64_f32.ptx
│ │ ├── cvt_pack.ptx
│ │ ├── cvt_relu_f16x2_f32.ptx
│ │ ├── cvt_rn_bf16x2_f32.ptx
│ │ ├── cvt_rn_f16x2_e4m3x2.ptx
│ │ ├── cvt_rn_f16x2_e5m2x2.ptx
│ │ ├── cvt_rn_satfinite_e4m3x2_f32.ptx
│ │ ├── cvt_rn_satfinite_e5m2x2_f32.ptx
│ │ ├── cvt_rni.ptx
│ │ ├── cvt_rni_u16_f32.ptx
│ │ ├── cvt_rzi.ptx
│ │ ├── cvt_s16_s8.ptx
│ │ ├── cvt_s32_f32.ptx
│ │ ├── cvt_s64_s32.ptx
│ │ ├── cvt_sat_s_u.ptx
│ │ ├── cvta.ptx
│ │ ├── div_approx.ptx
│ │ ├── div_ftz.ptx
│ │ ├── div_noftz.ptx
│ │ ├── dp2a.ptx
│ │ ├── dp4a.ptx
│ │ ├── ex2.ptx
│ │ ├── extern_func.ptx
│ │ ├── extern_shared.ptx
│ │ ├── extern_shared_call.ptx
│ │ ├── fma.ptx
│ │ ├── fma_bf16x2.ptx
│ │ ├── fma_f16x2.ptx
│ │ ├── fmax.ptx
│ │ ├── func_ptr.ptx
│ │ ├── global_array.ptx
│ │ ├── global_array_f32.ptx
│ │ ├── implicit_param.ptx
│ │ ├── lanemask_lt.ptx
│ │ ├── ld_st.ptx
│ │ ├── ld_st_implicit.ptx
│ │ ├── ld_st_offset.ptx
│ │ ├── ldmatrix.ptx
│ │ ├── ldmatrix_trans.ptx
│ │ ├── lg2.ptx
│ │ ├── local_align.ptx
│ │ ├── mad_extended.ptx
│ │ ├── mad_s32.ptx
│ │ ├── mad_wide.ptx
│ │ ├── malformed_label.ptx
│ │ ├── max.ptx
│ │ ├── membar.ptx
│ │ ├── min.ptx
│ │ ├── min_f16.ptx
│ │ ├── min_nan_f16.ptx
│ │ ├── mma_m16n8k16_f32_bf16_bf16_f32.ptx
│ │ ├── mma_m16n8k16_f32_bf16_bf16_f32_2x.ptx
│ │ ├── mma_m16n8k16_f32_f16_f16_f32.ptx
│ │ ├── mma_m16n8k32_s32_s8_s8_s32.ptx
│ │ ├── mma_m16n8k32_s32_s8_s8_s32_interleave.ptx
│ │ ├── mod.rs
│ │ ├── mov.ptx
│ │ ├── mov_address.ptx
│ │ ├── mul24_hi_s32.ptx
│ │ ├── mul24_hi_u32.ptx
│ │ ├── mul24_lo_s32.ptx
│ │ ├── mul24_lo_u32.ptx
│ │ ├── mul_ftz.ptx
│ │ ├── mul_hi.ptx
│ │ ├── mul_lo.ptx
│ │ ├── mul_non_ftz.ptx
│ │ ├── mul_wide.ptx
│ │ ├── multiple_return.ptx
│ │ ├── nanosleep.ptx
│ │ ├── neg.ptx
│ │ ├── non_scalar_ptr_offset.ptx
│ │ ├── noreturn.ptx
│ │ ├── not.ptx
│ │ ├── ntid.ptx
│ │ ├── or.ptx
│ │ ├── param_is_addressable.ptx
│ │ ├── popc.ptx
│ │ ├── pred_not.ptx
│ │ ├── prmt.ptx
│ │ ├── prmt_slow.ptx
│ │ ├── rcp.ptx
│ │ ├── redux_sync_add_u32_partial.ptx
│ │ ├── redux_sync_op_s32.ptx
│ │ ├── redux_sync_op_u32.ptx
│ │ ├── reg_local.ptx
│ │ ├── reg_multi.ptx
│ │ ├── rem.ptx
│ │ ├── rsqrt.ptx
│ │ ├── sad_s64.ptx
│ │ ├── selp.ptx
│ │ ├── selp_true.ptx
│ │ ├── set_f16.ptx
│ │ ├── setp.ptx
│ │ ├── setp_gt.ptx
│ │ ├── setp_leu.ptx
│ │ ├── setp_nan.ptx
│ │ ├── setp_num.ptx
│ │ ├── shared_ptr_32.ptx
│ │ ├── shared_ptr_take_address.ptx
│ │ ├── shared_unify_extern.ptx
│ │ ├── shared_unify_local.ptx
│ │ ├── shared_variable.ptx
│ │ ├── shf_l.ptx
│ │ ├── shf_l_clamp.ptx
│ │ ├── shf_l_wrap.ptx
│ │ ├── shf_r.ptx
│ │ ├── shf_r_clamp.ptx
│ │ ├── shf_r_wrap.ptx
│ │ ├── shfl_sync_bfly_b32_pred.ptx
│ │ ├── shfl_sync_down_b32_pred.ptx
│ │ ├── shfl_sync_idx_b32_pred.ptx
│ │ ├── shfl_sync_mode_b32.ptx
│ │ ├── shfl_sync_up_b32_pred.ptx
│ │ ├── shl.ptx
│ │ ├── shr.ptx
│ │ ├── shr_oob.ptx
│ │ ├── sign_extend.ptx
│ │ ├── sin.ptx
│ │ ├── sqrt.ptx
│ │ ├── sqrt_rn_ftz.ptx
│ │ ├── stateful_ld_st_ntid.ptx
│ │ ├── stateful_ld_st_ntid_chain.ptx
│ │ ├── stateful_ld_st_ntid_sub.ptx
│ │ ├── stateful_ld_st_simple.ptx
│ │ ├── stateful_neg_offset.ptx
│ │ ├── sub.ptx
│ │ ├── sub_extended.ptx
│ │ ├── subc_cc_s32.ptx
│ │ ├── tanh.ptx
│ │ ├── tid.ptx
│ │ ├── trap.ptx
│ │ ├── uint_to_fp_bf16.ptx
│ │ ├── vector.ptx
│ │ ├── vector4.ptx
│ │ ├── vector8.ptx
│ │ ├── vector8_extract.ptx
│ │ ├── vector_extract.ptx
│ │ ├── vector_operand.ptx
│ │ ├── verify.py
│ │ ├── vote_all.ptx
│ │ ├── vote_all_sub.ptx
│ │ ├── vote_any.ptx
│ │ ├── vote_ballot.ptx
│ │ ├── warp_sz.ptx
│ │ └── xor.ptx
│ ├── vectorAdd_11.ptx
│ └── vectorAdd_kernel64.ptx
├── ptx_parser/
│ ├── Cargo.toml
│ └── src/
│ ├── ast.rs
│ ├── check_args.py
│ └── lib.rs
├── ptx_parser_macros/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── ptx_parser_macros_impl/
│ ├── Cargo.toml
│ └── src/
│ ├── lib.rs
│ └── parser.rs
├── ptxas/
│ ├── Cargo.toml
│ └── src/
│ └── main.rs
├── xtask/
│ ├── Cargo.toml
│ └── src/
│ └── main.rs
├── zluda/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── impl/
│ │ ├── context.rs
│ │ ├── device.rs
│ │ ├── driver.rs
│ │ ├── event.rs
│ │ ├── function.rs
│ │ ├── graph.rs
│ │ ├── hipfix.rs
│ │ ├── kernel.rs
│ │ ├── library.rs
│ │ ├── memory.rs
│ │ ├── mod.rs
│ │ ├── module.rs
│ │ ├── os_unix.rs
│ │ ├── os_win.rs
│ │ ├── pointer.rs
│ │ └── stream.rs
│ ├── lib.rs
│ ├── os_unix.rs
│ ├── os_win.rs
│ └── tests.rs
├── zluda_bindgen/
│ ├── Cargo.toml
│ ├── build/
│ │ ├── cublasLt_internal.h
│ │ ├── cublas_wrapper.h
│ │ ├── cuda_wrapper.h
│ │ ├── cudnn_v8/
│ │ │ ├── cudnn_adv_infer.h
│ │ │ ├── cudnn_adv_train.h
│ │ │ ├── cudnn_backend.h
│ │ │ ├── cudnn_cnn_infer.h
│ │ │ ├── cudnn_cnn_train.h
│ │ │ ├── cudnn_ops_infer.h
│ │ │ ├── cudnn_ops_train.h
│ │ │ └── cudnn_version.h
│ │ ├── cufft_wraper.h
│ │ └── decompile_cublaslt_internal.py
│ └── src/
│ ├── main.rs
│ └── process_table.rs
├── zluda_blas/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── impl.rs
│ ├── lib.rs
│ └── tests.rs
├── zluda_blaslt/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── impl.rs
│ └── lib.rs
├── zluda_cache/
│ ├── Cargo.toml
│ ├── diesel.toml
│ ├── migrations/
│ │ ├── .keep
│ │ └── 2025-08-04-203347_create_initial/
│ │ ├── down.sql
│ │ └── up.sql
│ └── src/
│ ├── lib.rs
│ ├── models.rs
│ └── schema.rs
├── zluda_common/
│ ├── Cargo.toml
│ └── src/
│ ├── constants.rs
│ ├── lib.rs
│ ├── os_unix.rs
│ └── os_win.rs
├── zluda_dnn/
│ ├── Cargo.toml
│ └── src/
│ ├── impl.rs
│ └── lib.rs
├── zluda_dnn8/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ └── lib.rs
├── zluda_dnn9/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── lib.rs
│ └── tests.rs
├── zluda_fft/
│ ├── Cargo.toml
│ └── src/
│ ├── impl.rs
│ └── lib.rs
├── zluda_inject/
│ ├── Cargo.toml
│ ├── build.rs
│ ├── src/
│ │ ├── args.rs
│ │ ├── bin.rs
│ │ ├── main.rs
│ │ └── win.rs
│ └── tests/
│ ├── helpers/
│ │ ├── direct_cuinit.rs
│ │ ├── do_cuinit.rs
│ │ ├── do_cuinit_early.rs
│ │ ├── do_cuinit_late.rs
│ │ ├── do_cuinit_late_clr.cs
│ │ ├── indirect_cuinit.rs
│ │ └── subprocess.rs
│ └── inject.rs
├── zluda_ld/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_ml/
│ ├── Cargo.toml
│ └── src/
│ ├── impl_common.rs
│ ├── impl_unix.rs
│ ├── impl_win.rs
│ └── lib.rs
├── zluda_precompile/
│ ├── Cargo.toml
│ └── src/
│ └── main.rs
├── zluda_redirect/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_sparse/
│ ├── Cargo.toml
│ ├── build.rs
│ └── src/
│ ├── impl.rs
│ └── lib.rs
├── zluda_trace/
│ ├── Cargo.toml
│ └── src/
│ ├── dark_api.rs
│ ├── lib.rs
│ ├── log.rs
│ ├── os_unix.rs
│ ├── os_win.rs
│ └── trace.rs
├── zluda_trace_blas/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_blaslt/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_common/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_dnn8/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_dnn9/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_fft/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_nvml/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
├── zluda_trace_sparse/
│ ├── Cargo.toml
│ └── src/
│ └── lib.rs
└── zluda_windows/
├── Cargo.toml
├── library.manifest
├── manifest.rc
└── src/
└── lib.rs
Copy disabled (too large)
Download .json
Condensed preview — 871 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (16,836K chars).
[
{
"path": ".cargo/config.toml",
"chars": 124,
"preview": "[alias]\r\nxtask = \"run --package xtask --\"\r\n\r\n[target.x86_64-pc-windows-msvc]\r\nrustflags = [\"-Ctarget-feature=+crt-static"
},
{
"path": ".devcontainer/Dockerfile",
"chars": 2553,
"preview": "FROM nvidia/cuda:13.0.1-base-ubuntu24.04\n\nRUN DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninterac"
},
{
"path": ".devcontainer/devcontainer.json",
"chars": 843,
"preview": "// For format details, see https://aka.ms/devcontainer.json. For config options, see the\n// README at: https://github.co"
},
{
"path": ".git-blame-ignore-revs",
"chars": 41,
"preview": "21ef5f60a3a5efa17855a30f6b5c7d1968cd46ba\n"
},
{
"path": ".gitattributes",
"chars": 108,
"preview": "ext/** linguist-vendored\n*.dll filter=lfs diff=lfs merge=lfs -text\n*.bc filter=lfs diff=lfs merge=lfs -text\n"
},
{
"path": ".github/ISSUE_TEMPLATE/config.yml",
"chars": 27,
"preview": "blank_issues_enabled: true\n"
},
{
"path": ".github/ISSUE_TEMPLATE/zluda_dump.yml",
"chars": 2571,
"preview": "name: Bug Report\ndescription: Report an issue with ZLUDA\nbody:\n - type: markdown\n attributes:\n value: |\n "
},
{
"path": ".github/workflows/move_tests.sh",
"chars": 348,
"preview": "#!/bin/bash\nset -ex\nTEST_EXECUTABLES_DIR=$1\nSUFFIX=$2\n\nls ${TEST_EXECUTABLES_DIR}/* | sort -u | while read -r executable"
},
{
"path": ".github/workflows/nightly_tests.yml",
"chars": 1767,
"preview": "name: Nightly tests\non:\n workflow_call:\n workflow_dispatch:\n\nenv:\n ROCM_VERSION: \"6.3.4\"\n AMDGPU_VERSION: \"6.4.4\"\n "
},
{
"path": ".github/workflows/pr_master.yml",
"chars": 4907,
"preview": "name: ZLUDA\non:\n pull_request:\n branches: [ master ]\n\nenv:\n CARGO_TERM_COLOR: always\n CARGO_PROFILE: release\n SCC"
},
{
"path": ".github/workflows/push_master.yml",
"chars": 6229,
"preview": "name: ZLUDA\non:\n workflow_dispatch:\n push:\n branches: [ master ]\n\nenv:\n CARGO_TERM_COLOR: always\n CARGO_PROFILE: "
},
{
"path": ".github/workflows/rocm_setup_build.sh",
"chars": 1058,
"preview": "#!/bin/bash\nset -ex\nROCM_VERSION=$1\n\nDEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends gpg patchelf\n"
},
{
"path": ".github/workflows/rocm_setup_run.sh",
"chars": 1607,
"preview": "#!/bin/bash\nset -ex\nROCM_VERSION=$1\nAMDGPU_VERSION=$2\n\nDEBIAN_FRONTEND=noninteractive apt install -y --no-install-recomm"
},
{
"path": ".github/workflows/trigger_nightly_tests.yml",
"chars": 801,
"preview": "name: Trigger nightly tests\non:\n schedule:\n - cron: \"0 8 * * *\"\n\njobs:\n check_last_nightly_run:\n runs-on: 'ubunt"
},
{
"path": ".gitignore",
"chars": 63,
"preview": "target/\nCargo.lock\n\n.vscode/\n.idea/\n\nptx/lib/zluda_ptx_impl.ll\n"
},
{
"path": ".gitmodules",
"chars": 240,
"preview": "[submodule \"ext/llvm-project\"]\n\tpath = ext/llvm-project\n\turl = https://github.com/vosen/llvm-project.git\n\tbranch = main\n"
},
{
"path": ".rustfmt.toml",
"chars": 23,
"preview": "newline_style = \"Unix\"\n"
},
{
"path": "Cargo.toml",
"chars": 1524,
"preview": "[workspace]\n\nresolver = \"2\"\n\nmembers = [\n \"cuda_check\",\n \"cuda_macros\",\n \"cuda_types\",\n \"dark_api\",\n \"det"
},
{
"path": "LICENSE-APACHE",
"chars": 9723,
"preview": " Apache License\n Version 2.0, January 2004\n http"
},
{
"path": "LICENSE-MIT",
"chars": 1023,
"preview": "Permission is hereby granted, free of charge, to any\nperson obtaining a copy of this software and associated\ndocumentati"
},
{
"path": "README.md",
"chars": 718,
"preview": "ZLUDA is a drop-in replacement for CUDA on non-NVIDIA GPUs. ZLUDA allows running unmodified CUDA applications using non-"
},
{
"path": "compiler/Cargo.toml",
"chars": 471,
"preview": "[package]\nname = \"compiler\"\ndescription = \"ZLUDA offline compiler\"\nversion = \"0.0.0\"\nauthors = [\"Joëlle van Essen <joell"
},
{
"path": "compiler/src/error.rs",
"chars": 1805,
"preview": "use ptx::TranslateError;\nuse ptx_parser::PtxError;\nuse std::ffi::FromBytesUntilNulError;\nuse std::io;\nuse std::str::Utf8"
},
{
"path": "compiler/src/main.rs",
"chars": 19415,
"preview": "use bpaf::Bpaf;\nuse error::CompilerError;\nuse std::ffi::CStr;\nuse std::fs::{self, File};\nuse std::io::{self, Write};\nuse"
},
{
"path": "cuda_check/Cargo.toml",
"chars": 579,
"preview": "[package]\nname = \"cuda_check\"\nversion = \"0.0.0\"\nauthors = [\"Andrzej Janik <vosen@vosen.pl>\"]\nedition = \"2021\"\n\n[[bin]]\nn"
},
{
"path": "cuda_check/src/main.rs",
"chars": 107,
"preview": "#[cfg(windows)]\nmod win;\n\n#[cfg(windows)]\nfn main() {\n win::main()\n}\n\n#[cfg(not(windows))]\nfn main() {}\n"
},
{
"path": "cuda_check/src/win.rs",
"chars": 10902,
"preview": "use bpaf::{construct, pure, Bpaf, Parser};\nuse owo_colors::{OwoColorize, Stream};\nuse rand::seq::SliceRandom;\nuse std::{"
},
{
"path": "cuda_macros/.rustfmt.toml",
"chars": 30,
"preview": "disable_all_formatting = true\n"
},
{
"path": "cuda_macros/Cargo.toml",
"chars": 283,
"preview": "[package]\nname = \"cuda_macros\"\nversion = \"0.0.0\"\nauthors = [\"Andrzej Janik <vosen@vosen.pl>\"]\nedition = \"2021\"\n\n[depende"
},
{
"path": "cuda_macros/build/wrapper.h",
"chars": 169,
"preview": "#define __CUDA_API_VERSION_INTERNAL\n#include <cuda.h>\n#include <cudaProfiler.h>\n#include <cudaGL.h>\n#include <cudaEGL.h>"
},
{
"path": "cuda_macros/src/cublas.rs",
"chars": 245364,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n #[must_use"
},
{
"path": "cuda_macros/src/cublaslt.rs",
"chars": 30240,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n #[must_use"
},
{
"path": "cuda_macros/src/cublaslt_internal.rs",
"chars": 68573,
"preview": "/* automatically generated by rust-bindgen 0.70.1 */\n\nextern \"system\" {\n fn cublasLtShutdownCtx(param_1: ::core::ffi:"
},
{
"path": "cuda_macros/src/cuda.rs",
"chars": 917710,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n /** \\brief"
},
{
"path": "cuda_macros/src/cudnn8.rs",
"chars": 118378,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n fn cudnnGe"
},
{
"path": "cuda_macros/src/cudnn9.rs",
"chars": 94345,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n fn cudnnGe"
},
{
"path": "cuda_macros/src/cufft.rs",
"chars": 15917,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n fn cufftPl"
},
{
"path": "cuda_macros/src/cusparse.rs",
"chars": 238716,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n #[must_use"
},
{
"path": "cuda_macros/src/lib.rs",
"chars": 12261,
"preview": "extern crate proc_macro;\n\nuse proc_macro::TokenStream;\nuse proc_macro2::Span;\nuse quote::{format_ident, quote, ToTokens}"
},
{
"path": "cuda_macros/src/nvml.rs",
"chars": 517441,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\nextern \"system\" {\n #[must_use"
},
{
"path": "cuda_types/.rustfmt.toml",
"chars": 30,
"preview": "disable_all_formatting = true\n"
},
{
"path": "cuda_types/Cargo.toml",
"chars": 514,
"preview": "[package]\nname = \"cuda_types\"\nversion = \"0.0.0\"\nauthors = [\"Andrzej Janik <vosen@vosen.pl>\"]\nedition = \"2021\"\n\n[dependen"
},
{
"path": "cuda_types/src/cublas.rs",
"chars": 14296,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
},
{
"path": "cuda_types/src/cublaslt.rs",
"chars": 162732,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
},
{
"path": "cuda_types/src/cuda.rs",
"chars": 381613,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub const CUDA_VERSION: u32 = 13"
},
{
"path": "cuda_types/src/cudnn.rs",
"chars": 48985,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\n#[repr(C)]\n#[derive(Debug, Copy,"
},
{
"path": "cuda_types/src/cudnn8.rs",
"chars": 25888,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
},
{
"path": "cuda_types/src/cudnn9.rs",
"chars": 97036,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
},
{
"path": "cuda_types/src/cufft.rs",
"chars": 17377,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
},
{
"path": "cuda_types/src/cusparse.rs",
"chars": 21285,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
},
{
"path": "cuda_types/src/dark_api.rs",
"chars": 2623,
"preview": "use bitflags::bitflags;\nuse std::ffi::{c_uint, c_ulonglong, c_ushort, c_void};\n\n/*\nfat_cubin:\ntypedef struct {\n int mag"
},
{
"path": "cuda_types/src/lib.rs",
"chars": 178,
"preview": "pub enum FILE {}\n\npub mod cublas;\npub mod cublaslt;\npub mod cuda;\npub mod cudnn;\npub mod cudnn8;\npub mod cudnn9;\npub mod"
},
{
"path": "cuda_types/src/nvml.rs",
"chars": 237063,
"preview": "// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\npub type __half = u16;\npub type "
},
{
"path": "dark_api/Cargo.toml",
"chars": 283,
"preview": "[package]\nname = \"dark_api\"\nversion = \"0.0.0\"\nedition = \"2021\"\n\n[dependencies]\ncuda_types = { path = \"../cuda_types\" }\nf"
},
{
"path": "dark_api/src/fatbin.rs",
"chars": 9978,
"preview": "// This file contains a higher-level interface for parsing fatbins\n\nuse std::{borrow::Cow, ptr};\n\nuse cuda_types::dark_a"
},
{
"path": "dark_api/src/lib.rs",
"chars": 26570,
"preview": "use std::ffi::c_void;\n\nuse cuda_types::cuda::CUuuid;\n\npub mod fatbin;\n\nmacro_rules! dark_api_init {\n (SIZE_OF, $table"
},
{
"path": "detours-sys/Cargo.toml",
"chars": 801,
"preview": "[package]\nname = \"detours-sys\"\nversion = \"0.1.2\"\nauthors = [\"Diana <5275194+DianaNites@users.noreply.github.com>\"]\nediti"
},
{
"path": "detours-sys/LICENSE-APACHE",
"chars": 11358,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "detours-sys/LICENSE-MIT",
"chars": 1045,
"preview": "Copyright 2019 Diana\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and "
},
{
"path": "detours-sys/README.md",
"chars": 1083,
"preview": "# Detours-sys\n\n[](https://crates.io/crates/detours-sys)\n![m"
},
{
"path": "detours-sys/build/wrapper.h",
"chars": 42,
"preview": "#include <windows.h>\n#include <detours.h>\n"
},
{
"path": "detours-sys/build.rs",
"chars": 1346,
"preview": "#[cfg(not(target_os = \"windows\"))]\nfn main() {}\n\n#[cfg(target_os = \"windows\")]\nfn main() -> Result<(), Box<dyn std::erro"
},
{
"path": "detours-sys/src/bundled_bindings.rs",
"chars": 31903,
"preview": "/* automatically generated by rust-bindgen 0.56.0 */\n\npub type wchar_t = ::std::os::raw::c_ushort;\npub type ULONG = ::st"
},
{
"path": "detours-sys/src/lib.rs",
"chars": 2348,
"preview": "#![cfg(target_os = \"windows\")]\n\n//! Bindings to the Microsoft Detours API.\n#![allow(non_camel_case_types)]\n#![allow(non_"
},
{
"path": "docs/.gitignore",
"chars": 5,
"preview": "book\n"
},
{
"path": "docs/.readthedocs.yaml",
"chars": 237,
"preview": "version: 2\nbuild:\n os: ubuntu-lts-latest\n tools:\n rust: latest\n jobs:\n install:\n - cargo install mdbook@0."
},
{
"path": "docs/book.toml",
"chars": 79,
"preview": "[book]\nauthors = [\"Andrzej Janik\"]\nlanguage = \"en\"\nsrc = \"src\"\ntitle = \"ZLUDA\"\n"
},
{
"path": "docs/src/SUMMARY.md",
"chars": 335,
"preview": "# Summary\n\n# General\n- [Quick start](./quick_start.md)\n- [Installing HIP SDK](./hip_sdk.md)\n- [FAQ](./faq.md)\n\n# Trouble"
},
{
"path": "docs/src/building.md",
"chars": 614,
"preview": "# Building\n\n## Dependencies\n\n* Git\n* CMake\n* Python 3\n* Rust compiler (recent version)\n* C++ compiler\n* (Linux only) HIP"
},
{
"path": "docs/src/faq.md",
"chars": 4275,
"preview": "# FAQ\n\n> [!WARNING]\n> For legal reasons we can't help you with the pre-rollback versions (older than 4). See more here: "
},
{
"path": "docs/src/hip_sdk.md",
"chars": 3158,
"preview": "# Installing HIP SDK\n\nOn Windows, in addition to installing the GPU driver, you need to install the HIP SDK. Choose one "
},
{
"path": "docs/src/llama_cpp.md",
"chars": 472,
"preview": "# llama.cpp\n\nllama.cpp runs at native speed when compiled for CUDA architecture 86 and with cuBLAS enabled:\n```\ncmake -B"
},
{
"path": "docs/src/precompiling.md",
"chars": 738,
"preview": "# Precompiling\n\nConsider precompiling the GPU code with `zluda_precompile` if you are trying to run a large application."
},
{
"path": "docs/src/quick_start.md",
"chars": 1799,
"preview": "# Quick start\n\n> [!WARNING]\n> This version of ZLUDA is under heavy development and will likely not work with your applic"
},
{
"path": "docs/src/troubleshooting.md",
"chars": 8600,
"preview": "# Logging\n\n## Introduction\n\nzluda_trace is a [shim](https://en.wikipedia.org/wiki/Shim_(computing))\nfor the CUDA API whi"
},
{
"path": "ext/detours/.github/ISSUE_TEMPLATE/bug-report.md",
"chars": 1990,
"preview": "---\r\nname: Bug Report\r\nabout: Report a bug in Detours\r\ntitle: \"<header>: Problem\"\r\nlabels: 'bug'\r\nassignees: ''\r\n\r\n---\r\n"
},
{
"path": "ext/detours/.github/ISSUE_TEMPLATE/question.md",
"chars": 1382,
"preview": "---\r\nname: Question\r\nabout: Ask a question about Detours\r\ntitle: \"\"\r\nlabels: question\r\nassignees: ''\r\n\r\n---\r\n\r\nInstructi"
},
{
"path": "ext/detours/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md",
"chars": 493,
"preview": "<!--\r\nBefore submitting a pull request, please ensure that:\r\n\r\n* These changes introduce no known API breaks (changing t"
},
{
"path": "ext/detours/.github/codeql/codeql-config.yml",
"chars": 108,
"preview": "---\r\nname: \"Detours CodeQL Config\"\r\n\r\nqueries:\r\n - uses: security-and-quality\r\n - uses: security-extended \r\n"
},
{
"path": "ext/detours/.github/workflows/main.yml",
"chars": 2466,
"preview": "name: CI-Build\n\nenv:\n # Turn on msvc analysis during build, enable once warnings are clean.\n DETOURS_ANALYZE: true\n\n "
},
{
"path": "ext/detours/.gitignore",
"chars": 424,
"preview": "# C extensions\r\n*.so\r\n\r\n# Unit test / coverage reports\r\n.coverage\r\n.tox\r\nnosetests.xml\r\n\r\n# Translations\r\n*.mo\r\n\r\n# Mr D"
},
{
"path": "ext/detours/CREDITS.TXT",
"chars": 4987,
"preview": "==============================================================================\nThe following individuals have helped ide"
},
{
"path": "ext/detours/LICENSE.md",
"chars": 1099,
"preview": "# Copyright (c) Microsoft Corporation\n\nAll rights reserved.\n\n# MIT License\n\nPermission is hereby granted, free of charge"
},
{
"path": "ext/detours/Makefile",
"chars": 2065,
"preview": "##############################################################################\n##\n## Makefile for Detours.\n##\n## Micro"
},
{
"path": "ext/detours/README.md",
"chars": 3177,
"preview": "# Microsoft Research Detours Package\n\nDetours is a software package for monitoring and instrumenting API calls on Window"
},
{
"path": "ext/detours/samples/Makefile",
"chars": 9833,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/README.TXT",
"chars": 2614,
"preview": "##############################################################################\n##\n## Samples README File\n##\n## Microso"
},
{
"path": "ext/detours/samples/comeasy/Makefile",
"chars": 3913,
"preview": "##############################################################################\n##\n## API Extension to Measure time slep"
},
{
"path": "ext/detours/samples/comeasy/comeasy.cpp",
"chars": 1450,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (comeasy.cpp o"
},
{
"path": "ext/detours/samples/comeasy/wrotei.cpp",
"chars": 5104,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (wrotei.cpp of"
},
{
"path": "ext/detours/samples/comeasy/wrotei.rc",
"chars": 560,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Version information for wrotei.rc."
},
{
"path": "ext/detours/samples/commem/Makefile",
"chars": 1198,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/commem/commem.cpp",
"chars": 3243,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour functions of a COM interfac"
},
{
"path": "ext/detours/samples/common.mak",
"chars": 2015,
"preview": "##############################################################################\n##\n## Common makefile for Detours test p"
},
{
"path": "ext/detours/samples/cping/Makefile",
"chars": 3424,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/cping/ReadMe.Txt",
"chars": 1524,
"preview": "Microsoft Research Detours Package\n==============================================================================\n4/2/98"
},
{
"path": "ext/detours/samples/cping/cping.cpp",
"chars": 65027,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Module: cping.cpp (cping.exe)\n//\n/"
},
{
"path": "ext/detours/samples/cping/iping.idl",
"chars": 681,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Module: iping.idl (cping.exe - COM"
},
{
"path": "ext/detours/samples/disas/Makefile",
"chars": 2081,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/disas/arm.asm",
"chars": 6759,
"preview": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n;;\n;; Detours Test Program (rlo.asm/disas"
},
{
"path": "ext/detours/samples/disas/disas.cpp",
"chars": 20476,
"preview": "/////////////////////////////////////////////////////////////////////////////\n//\n// Module: disas.cpp (disas.exe - Deto"
},
{
"path": "ext/detours/samples/disas/ia64.asm",
"chars": 28061,
"preview": "/////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (ia64.asm/disa"
},
{
"path": "ext/detours/samples/disas/unk.cpp",
"chars": 309,
"preview": "/////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (x86.asm of di"
},
{
"path": "ext/detours/samples/disas/x64.asm",
"chars": 16917,
"preview": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n;;\n;; Detours Test Program (x64.asm/disas"
},
{
"path": "ext/detours/samples/disas/x86.cpp",
"chars": 5082,
"preview": "/////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (x86.asm of di"
},
{
"path": "ext/detours/samples/dtest/Makefile",
"chars": 3003,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/dtest/NORMAL_IA64.TXT",
"chars": 5329,
"preview": " ..\\..\\bin.IA64\\dtest.exe\nCalling LocalTarget1 w/o detour\n LocalTarget1 (1)\nCalling LocalTarget1 w/ detour\n MyLocal"
},
{
"path": "ext/detours/samples/dtest/NORMAL_X64.TXT",
"chars": 5328,
"preview": " ..\\..\\bin.X64\\dtest.exe\nCalling LocalTarget1 w/o detour\n LocalTarget1 (1)\nCalling LocalTarget1 w/ detour\n MyLocalT"
},
{
"path": "ext/detours/samples/dtest/NORMAL_X86.TXT",
"chars": 5328,
"preview": " ..\\..\\bin.X86\\dtest.exe\nCalling LocalTarget1 w/o detour\n LocalTarget1 (1)\nCalling LocalTarget1 w/ detour\n MyLocalT"
},
{
"path": "ext/detours/samples/dtest/dtarge.cpp",
"chars": 11010,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (dtarge.dll)\n"
},
{
"path": "ext/detours/samples/dtest/dtarge.h",
"chars": 3486,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (dtarge.h of "
},
{
"path": "ext/detours/samples/dtest/dtarge.rc",
"chars": 556,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Version information for dtarge.rc."
},
{
"path": "ext/detours/samples/dtest/dtest.cpp",
"chars": 26200,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (dtest.cpp of"
},
{
"path": "ext/detours/samples/dumpe/Makefile",
"chars": 1329,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/dumpe/dumpe.cpp",
"chars": 3294,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (dumpe.cpp of"
},
{
"path": "ext/detours/samples/dumpi/Makefile",
"chars": 1204,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/dumpi/dumpi.cpp",
"chars": 7575,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (dumpi.cpp of"
},
{
"path": "ext/detours/samples/dynamic_alloc/Makefile",
"chars": 1814,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/dynamic_alloc/main.cpp",
"chars": 5738,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program\n//\n// Micros"
},
{
"path": "ext/detours/samples/dynamic_alloc/x64.asm",
"chars": 403,
"preview": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n;;\n;; Detours Test Program\n;;\n;; Microso"
},
{
"path": "ext/detours/samples/dynamic_alloc/x86.asm",
"chars": 433,
"preview": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n;;\n;; Detours Test Program\n;;\n;; Microso"
},
{
"path": "ext/detours/samples/echo/Makefile",
"chars": 3259,
"preview": "##############################################################################\n##\n## Detours Test Program\n##\n## Micros"
},
{
"path": "ext/detours/samples/echo/echofx.cpp",
"chars": 1538,
"preview": "//\n//\n//\n#include <windows.h>\n#include <detours.h>\n#include <stdio.h>\n\nint WINAPI Echo(PCSTR pszMsg);\n\nstatic int (WINAP"
},
{
"path": "ext/detours/samples/echo/echofx.rc",
"chars": 569,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Version information for echofx.rc."
},
{
"path": "ext/detours/samples/echo/echonul.cpp",
"chars": 187,
"preview": "//\n//\n//\n#include <windows.h>\n\nint WINAPI Echo(PCSTR pszMsg)\n{\n int sum = 0;\n while (*pszMsg) {\n sum = sum "
},
{
"path": "ext/detours/samples/echo/main.cpp",
"chars": 459,
"preview": "//\n//\n//\n#include <windows.h>\n\nint WINAPI Echo(PCSTR pszMsg);\n\nextern \"C\" int __stdcall mainCRTStartup(HINSTANCE hInstan"
},
{
"path": "ext/detours/samples/einst/Makefile",
"chars": 4813,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/einst/edll1x.cpp",
"chars": 1312,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (edll1x.cpp o"
},
{
"path": "ext/detours/samples/einst/edll2x.cpp",
"chars": 1323,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (edll2x.cpp o"
},
{
"path": "ext/detours/samples/einst/edll3x.cpp",
"chars": 1910,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (edll3x.cpp o"
},
{
"path": "ext/detours/samples/einst/einst.cpp",
"chars": 2375,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (einst.cpp of"
},
{
"path": "ext/detours/samples/excep/Makefile",
"chars": 1242,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/excep/excep.cpp",
"chars": 3824,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// First Chance Exception Handling Te"
},
{
"path": "ext/detours/samples/excep/firstexc.cpp",
"chars": 6642,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (firstexc.cpp"
},
{
"path": "ext/detours/samples/excep/firstexc.h",
"chars": 609,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (firstexc.h o"
},
{
"path": "ext/detours/samples/findfunc/Makefile",
"chars": 6218,
"preview": "##############################################################################\n##\n## Program to test DetourFindFunction"
},
{
"path": "ext/detours/samples/findfunc/extend.cpp",
"chars": 5015,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (extend.cpp of"
},
{
"path": "ext/detours/samples/findfunc/extend.rc",
"chars": 577,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Version information for extend.rc."
},
{
"path": "ext/detours/samples/findfunc/findfunc.cpp",
"chars": 955,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (findfunc.cpp "
},
{
"path": "ext/detours/samples/findfunc/symtest.cpp",
"chars": 13459,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (symtest.cpp o"
},
{
"path": "ext/detours/samples/findfunc/target.cpp",
"chars": 1044,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (target.cpp of"
},
{
"path": "ext/detours/samples/findfunc/target.h",
"chars": 375,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (target.h of t"
},
{
"path": "ext/detours/samples/findfunc/target.rc",
"chars": 556,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Version information for target.rc."
},
{
"path": "ext/detours/samples/impmunge/Makefile",
"chars": 1709,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/impmunge/impmunge.cpp",
"chars": 13492,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (impmunge.cpp"
},
{
"path": "ext/detours/samples/member/Makefile",
"chars": 1176,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/member/member.cpp",
"chars": 3777,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Test a detour of a member function"
},
{
"path": "ext/detours/samples/opengl/Makefile",
"chars": 3710,
"preview": "######################################################################\n##\n## Hook test for glFinish\n##\n\n!include ..\\com"
},
{
"path": "ext/detours/samples/opengl/ogldet.cpp",
"chars": 2053,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Module: ogldet.dll\n//\n// This"
},
{
"path": "ext/detours/samples/opengl/ogldet.rc",
"chars": 564,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Version information for ogldet.rc."
},
{
"path": "ext/detours/samples/opengl/testogl.cpp",
"chars": 470,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// File: testogl.cpp\n// Module"
},
{
"path": "ext/detours/samples/region/Makefile",
"chars": 1176,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/region/region.cpp",
"chars": 2926,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Test the different system region b"
},
{
"path": "ext/detours/samples/setdll/Makefile",
"chars": 1916,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/setdll/setdll.cpp",
"chars": 9684,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (setdll.cpp o"
},
{
"path": "ext/detours/samples/simple/Makefile",
"chars": 3885,
"preview": "##############################################################################\n##\n## API Extention to Measure time slep"
},
{
"path": "ext/detours/samples/simple/simple.cpp",
"chars": 2322,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (simple.cpp o"
},
{
"path": "ext/detours/samples/simple/simple.rc",
"chars": 556,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Version information for simple.rc."
},
{
"path": "ext/detours/samples/simple/sleep5.cpp",
"chars": 643,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (sleep5.cpp o"
},
{
"path": "ext/detours/samples/slept/Makefile",
"chars": 7026,
"preview": "##############################################################################\n##\n## API Extension to Measure time slep"
},
{
"path": "ext/detours/samples/slept/NORMAL_IA64.TXT",
"chars": 8721,
"preview": "-------- Reseting test binaries to initial state. -----------------------\n ..\\..\\bin.IA64\\setdll.exe -r ..\\..\\bin.IA6"
},
{
"path": "ext/detours/samples/slept/NORMAL_X64.TXT",
"chars": 8248,
"preview": "-------- Reseting test binaries to initial state. -----------------------\n ..\\..\\bin.X64\\setdll.exe -r ..\\..\\bin.X64\\"
},
{
"path": "ext/detours/samples/slept/NORMAL_X86.TXT",
"chars": 6930,
"preview": "-------- Reseting test binaries to initial state. -----------------------\n ..\\..\\bin.X86\\setdll.exe -r ..\\..\\bin.X86\\"
},
{
"path": "ext/detours/samples/slept/dslept.cpp",
"chars": 3992,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (dslept.cpp of"
},
{
"path": "ext/detours/samples/slept/dslept.rc",
"chars": 570,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Version information for dslept.rc."
},
{
"path": "ext/detours/samples/slept/sleepbed.cpp",
"chars": 2803,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (sleepbed.cpp "
},
{
"path": "ext/detours/samples/slept/sleepnew.cpp",
"chars": 2366,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (sleepnew.cpp "
},
{
"path": "ext/detours/samples/slept/sleepold.cpp",
"chars": 1767,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (sleepold.cpp "
},
{
"path": "ext/detours/samples/slept/slept.cpp",
"chars": 3330,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (slept.cpp of "
},
{
"path": "ext/detours/samples/slept/slept.h",
"chars": 572,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (slept.h of sl"
},
{
"path": "ext/detours/samples/slept/slept.rc",
"chars": 559,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Version information for sleep.rc.\n"
},
{
"path": "ext/detours/samples/slept/verify.cpp",
"chars": 1919,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detour Test Program (verify.cpp)\n/"
},
{
"path": "ext/detours/samples/syelog/Makefile",
"chars": 2990,
"preview": "##############################################################################\n##\n## Makefile for Detours.\n##\n## Micro"
},
{
"path": "ext/detours/samples/syelog/sltest.cpp",
"chars": 4408,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (sltest.cpp o"
},
{
"path": "ext/detours/samples/syelog/sltestp.cpp",
"chars": 3357,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (sltestp.cpp "
},
{
"path": "ext/detours/samples/syelog/syelog.cpp",
"chars": 29209,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (syelog.cpp o"
},
{
"path": "ext/detours/samples/syelog/syelog.h",
"chars": 3534,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (syelog.h of "
},
{
"path": "ext/detours/samples/syelog/syelogd.cpp",
"chars": 17253,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (syelogd.cpp "
},
{
"path": "ext/detours/samples/talloc/Makefile",
"chars": 12487,
"preview": "##############################################################################\n##\n## Makefile for Detours Test Programs"
},
{
"path": "ext/detours/samples/talloc/NORMAL_IA64.TXT",
"chars": 3549,
"preview": "talloc.exe: Detoured functions.\n\n Address Size: Typ Sta Prot Ini : Contents\n ------------ ------------: -"
},
{
"path": "ext/detours/samples/talloc/NORMAL_X64.TXT",
"chars": 3353,
"preview": "talloc.exe: Detoured functions.\n\n Address Size: Typ Sta Prot Ini : Contents\n ------------ ------------: -"
},
{
"path": "ext/detours/samples/talloc/talloc.cpp",
"chars": 16295,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (talloc.cpp o"
},
{
"path": "ext/detours/samples/talloc/tdll1x.cpp",
"chars": 525,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tdll1x.cpp o"
},
{
"path": "ext/detours/samples/talloc/tdll2x.cpp",
"chars": 525,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tdll2x.cpp o"
},
{
"path": "ext/detours/samples/talloc/tdll3x.cpp",
"chars": 525,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tdll3x.cpp o"
},
{
"path": "ext/detours/samples/talloc/tdll4x.cpp",
"chars": 525,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tdll4x.cpp o"
},
{
"path": "ext/detours/samples/talloc/tdll5x.cpp",
"chars": 525,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tdll5x.cpp o"
},
{
"path": "ext/detours/samples/talloc/tdll6x.cpp",
"chars": 525,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tdll6x.cpp o"
},
{
"path": "ext/detours/samples/talloc/tdll7x.cpp",
"chars": 525,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tdll7x.cpp o"
},
{
"path": "ext/detours/samples/talloc/tdll8x.cpp",
"chars": 525,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tdll8x.cpp o"
},
{
"path": "ext/detours/samples/talloc/tdll9x.cpp",
"chars": 525,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tdll9x.cpp o"
},
{
"path": "ext/detours/samples/traceapi/Makefile",
"chars": 5555,
"preview": "##############################################################################\n##\n## Utility to trace Win32 APIs.\n##\n##"
},
{
"path": "ext/detours/samples/traceapi/_win32.cpp",
"chars": 1057533,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (_win32.cpp o"
},
{
"path": "ext/detours/samples/traceapi/testapi.cpp",
"chars": 2017,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (testapi.cpp "
},
{
"path": "ext/detours/samples/traceapi/trcapi.cpp",
"chars": 14280,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (trcapi.cpp o"
},
{
"path": "ext/detours/samples/traceapi/trcapi.rc",
"chars": 569,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Version information for trcapi.rc."
},
{
"path": "ext/detours/samples/tracebld/Makefile",
"chars": 4893,
"preview": "##############################################################################\n##\n## Utility to registry and file acces"
},
{
"path": "ext/detours/samples/tracebld/tracebld.cpp",
"chars": 17992,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tracebld.cpp"
},
{
"path": "ext/detours/samples/tracebld/tracebld.h",
"chars": 1654,
"preview": "//////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (tracebld.h o"
},
{
"path": "ext/detours/samples/tracebld/trcbld.cpp",
"chars": 132100,
"preview": "/////////////////////////////////////////////////////////////////////////////\n//\n// Detours Test Program (trcbld.cpp of"
}
]
// ... and 671 more files (download for full content)
About this extraction
This page contains the full source code of the vosen/ZLUDA GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 871 files (17.8 MB), approximately 4.1M tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.