Repository: JuliaGPU/oneAPI.jl Branch: master Commit: 2140c1fa0f76 Files: 134 Total size: 1.9 MB Directory structure: gitextract_pys1ksq7/ ├── .buildkite/ │ └── pipeline.yml ├── .github/ │ ├── dependabot.yml │ └── workflows/ │ ├── CompatHelper.yml │ ├── DocsCleanup.yml │ ├── Format.yml │ ├── TagBot.yml │ ├── ci.yml │ └── docs.yml ├── .gitignore ├── CITATION.cff ├── LICENSE.md ├── Project.toml ├── README.md ├── codecov.yml ├── deps/ │ ├── .clang-format │ ├── .gitignore │ ├── CMakeLists.txt │ ├── Project.toml │ ├── build_ci.jl │ ├── build_local.jl │ ├── generate_helpers.jl │ ├── generate_interfaces.jl │ ├── onemkl_epilogue.cpp │ ├── onemkl_epilogue.h │ ├── onemkl_prologue.cpp │ ├── onemkl_prologue.h │ └── src/ │ ├── onemkl.cpp │ ├── onemkl.h │ ├── onemkl_dft.cpp │ ├── onemkl_dft.h │ ├── sycl.cpp │ ├── sycl.h │ └── sycl.hpp ├── docs/ │ ├── Project.toml │ ├── make.jl │ └── src/ │ ├── api/ │ │ ├── arrays.md │ │ ├── compiler.md │ │ ├── context.md │ │ ├── kernels.md │ │ └── memory.md │ ├── api.md │ ├── arrays.md │ ├── device.md │ ├── getting_started.md │ ├── index.md │ ├── installation.md │ ├── kernels.md │ ├── level_zero.md │ ├── memory.md │ ├── onemkl.md │ ├── troubleshooting.md │ └── usage/ │ └── performance.md ├── examples/ │ ├── gemm.jl │ └── vadd.jl ├── lib/ │ ├── level-zero/ │ │ ├── barrier.jl │ │ ├── cmdlist.jl │ │ ├── cmdqueue.jl │ │ ├── common.jl │ │ ├── context.jl │ │ ├── copy.jl │ │ ├── device.jl │ │ ├── driver.jl │ │ ├── error.jl │ │ ├── event.jl │ │ ├── fence.jl │ │ ├── libze.jl │ │ ├── libze_aliases.jl │ │ ├── memory.jl │ │ ├── module.jl │ │ ├── oneL0.jl │ │ ├── pointer.jl │ │ ├── residency.jl │ │ └── utils.jl │ ├── mkl/ │ │ ├── array.jl │ │ ├── fft.jl │ │ ├── interfaces.jl │ │ ├── linalg.jl │ │ ├── oneMKL.jl │ │ ├── utils.jl │ │ ├── wrappers_blas.jl │ │ ├── wrappers_lapack.jl │ │ └── wrappers_sparse.jl │ ├── support/ │ │ ├── Support.jl │ │ └── liboneapi_support.jl │ ├── sycl/ │ │ └── SYCL.jl │ └── utils/ │ ├── APIUtils.jl │ └── enum.jl ├── res/ │ ├── Project.toml │ ├── libze_prologue.jl │ ├── local.jl │ ├── support.toml │ ├── wrap.jl │ └── ze.toml ├── src/ │ ├── accumulate.jl │ ├── array.jl │ ├── broadcast.jl │ ├── compiler/ │ │ ├── compilation.jl │ │ ├── execution.jl │ │ └── reflection.jl │ ├── context.jl │ ├── device/ │ │ ├── array.jl │ │ ├── atomics.jl │ │ ├── quirks.jl │ │ └── runtime.jl │ ├── gpuarrays.jl │ ├── indexing.jl │ ├── mapreduce.jl │ ├── memory.jl │ ├── oneAPI.jl │ ├── oneAPIKernels.jl │ ├── pool.jl │ ├── random.jl │ ├── sorting.jl │ └── utils.jl └── test/ ├── Project.toml ├── array.jl ├── device/ │ └── intrinsics.jl ├── dummy.bc ├── dummy.ll ├── dummy.spt ├── dummy.spv ├── examples.jl ├── execution.jl ├── fft.jl ├── indexing.jl ├── kernelabstractions.jl ├── level-zero.jl ├── onemkl.jl ├── pointer.jl ├── random.jl ├── runtests.jl ├── setup.jl ├── sorting.jl └── sycl.jl ================================================ FILE CONTENTS ================================================ ================================================ FILE: .buildkite/pipeline.yml ================================================ steps: # Test supported Julia versions - group: ":julia: Julia" key: "julia" steps: - label: "Julia {{matrix.julia}}" plugins: - JuliaCI/julia#v1: version: "{{matrix.julia}}" - JuliaCI/julia-test#v1: test_args: "--quickfail" - JuliaCI/julia-coverage#v1: dirs: - src - lib - examples agents: queue: "juliagpu" intel: "*" commands: | julia --project=deps deps/build_ci.jl if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 120 matrix: setup: julia: - "1.10" - "1.11" - "1.12" - "nightly" adjustments: - with: julia: "nightly" soft_fail: true # Special tests - group: ":eyes: Special" depends_on: "julia" steps: - label: "Validation" plugins: - JuliaCI/julia#v1: version: "1.11" - JuliaCI/julia-test#v1: julia_args: "-g2" - JuliaCI/julia-coverage#v1: codecov: true dirs: - src - lib - examples command: | julia --project=deps deps/build_ci.jl julia --project -e ' # use debug JLLs, for asserts + better backtraces using oneAPI oneAPI.set_debug!(true)' if: build.message !~ /\[skip tests\]/ env: ZE_ENABLE_VALIDATION_LAYER: '1' ZE_ENABLE_PARAMETER_VALIDATION: '1' EnableDebugBreak: '0' agents: queue: "juliagpu" intel: "*" if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft timeout_in_minutes: 60 soft_fail: true env: JULIA_PKG_SERVER_REGISTRY_PREFERENCE: "eager" # OK to downloading JLLs from GitHub SECRET_CODECOV_TOKEN: "OYpS8fj3vGhj7iZf9vLAeapyxQNSOEW6mApcSvGboL9AlS+0nfOSFjFrIBNnIU0prxQQy1gR9AwR/JO1m2OFWeRhjYtkQPPhk4xVtSKmv0LLTL0snA8IohUopqfu722i7zLrPcz/A0LFIFsb0ey+oReJs2xnGOshNIJu4FDowUV3wmZvfKWNsSK4cGN+HFQ3387Ow4SsmiUr7oqh0iMBQNqaY8oZ2BY1dFOgPaOegIp70YEFRdJ8DKaLd7WGxFLY9oQEhZZdmx/zx0xo56/NGtDwVYkDPa4qPhJczDBoIn5XvcRiIW0VJ/MaRARxnpenBX5H6gwdcZYUGtjXWIRXBw==;U2FsdGVkX1/bZy1Bp4/dBH5scPpWqLKusXGvSkRGUa+1F7hi4P4Cu5a6GcfNIEvQr+bBj2VlZvqhNW0FAqN3QQ==" ================================================ FILE: .github/dependabot.yml ================================================ # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates version: 2 updates: - package-ecosystem: "github-actions" directory: "/" # Location of package manifests schedule: interval: "monthly" ================================================ FILE: .github/workflows/CompatHelper.yml ================================================ name: CompatHelper on: schedule: - cron: '0 0 * * *' workflow_dispatch: jobs: CompatHelper: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Get Julia compatibility id: julia_compat # NOTE: this requires a Julia compat lower-bound with minor version! run : | version=$(grep '^julia = ' Project.toml | grep -o '".*"' | cut -d '"' -f2) echo "::set-output name=version::$version" - uses: julia-actions/setup-julia@v2 with: version: ${{ steps.julia_compat.outputs.version }} - name: Install CompatHelper run: | import Pkg name = "CompatHelper" version = "3" Pkg.add(; name, version) shell: julia --color=yes {0} - name: Run CompatHelper run: | using CompatHelper CompatHelper.main() shell: julia --color=yes {0} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/DocsCleanup.yml ================================================ name: Doc Preview Cleanup on: pull_request: types: [closed] jobs: doc-preview-cleanup: runs-on: ubuntu-latest steps: - name: Checkout gh-pages branch uses: actions/checkout@v6 with: ref: gh-pages - name: Delete preview and history run: | git config user.name "oneAPI.jl" git config user.email "oneapi@juliagpu.github.io" git rm -rf "previews/PR$PRNUM" git commit -m "delete preview" git branch gh-pages-new $(echo "delete history" | git commit-tree HEAD^{tree}) env: PRNUM: ${{ github.event.number }} - name: Push changes run: | git push --force origin gh-pages-new:gh-pages ================================================ FILE: .github/workflows/Format.yml ================================================ name: 'Format' on: pull_request_target: paths: ['**/*.jl'] types: [opened, synchronize, reopened, ready_for_review] permissions: contents: read actions: write pull-requests: write jobs: runic: runs-on: ubuntu-latest if: github.event.pull_request.draft == false steps: - name: Check out repository uses: actions/checkout@v6 with: ref: ${{github.event.pull_request.head.ref}} repository: ${{github.event.pull_request.head.repo.full_name}} fetch-depth: 0 - name: Add upstream remote run: | git remote add upstream https://github.com/${{ github.repository }} git fetch upstream - name: Setup Julia uses: julia-actions/setup-julia@v2 with: version: '1' arch: 'x64' - uses: julia-actions/cache@v2 - name: Install Runic run: | julia --project=@runic -e 'using Pkg; Pkg.add("Runic")' curl -o git-runic https://raw.githubusercontent.com/fredrikekre/Runic.jl/master/bin/git-runic chmod +x git-runic sudo mv git-runic /usr/local/bin - name: Run Runic id: runic run: | set +e MERGE_BASE=$(git merge-base upstream/${{ github.base_ref }} HEAD) || exit 1 DIFF=$(git runic --diff $MERGE_BASE) EXIT_CODE=$? echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT echo "diff<> $GITHUB_OUTPUT echo "$DIFF" >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT # if Runic failed, bail out [ $EXIT_CODE -eq 2 ] && exit 1 || exit 0 - name: Find comment uses: peter-evans/find-comment@v4 id: find-comment with: issue-number: ${{ github.event.pull_request.number }} comment-author: 'github-actions[bot]' body-includes: '' - name: Comment formatting suggestions if: steps.runic.outputs.exit_code == 1 uses: peter-evans/create-or-update-comment@v5 with: comment-id: ${{ steps.find-comment.outputs.comment-id }} issue-number: ${{ github.event.pull_request.number }} body: | Your PR requires formatting changes to meet the project's style guidelines. Please consider running [Runic](https://github.com/fredrikekre/Runic.jl) (`git runic ${{ github.base_ref }}`) to apply these changes.
Click here to view the suggested changes. ~~~diff ${{ steps.runic.outputs.diff }} ~~~
edit-mode: replace - name: Update stale comment if: steps.runic.outputs.exit_code == 0 && steps.find-comment.outputs.comment-id uses: peter-evans/create-or-update-comment@v5 with: comment-id: ${{ steps.find-comment.outputs.comment-id }} issue-number: ${{ github.event.pull_request.number }} body: | Your PR no longer requires formatting changes. Thank you for your contribution! edit-mode: replace # XXX: if Github ever supports allow-failure (actions/runner#2347) #- name: Propagate exit code # run: | # exit ${{ steps.runic.outputs.exit_code }} ================================================ FILE: .github/workflows/TagBot.yml ================================================ name: TagBot on: issue_comment: types: - created workflow_dispatch: jobs: TagBot: if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' runs-on: ubuntu-latest steps: - uses: JuliaRegistries/TagBot@v1 with: token: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: branches: - master tags: '*' pull_request: types: [opened, synchronize, reopened] schedule: - cron: '0 0 * * 0' jobs: self-runner: continue-on-error: true env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} runs-on: [self-hosted, linux, X64] strategy: matrix: os: [ubuntu-latest] julia-version: ['1'] julia-arch: [x64] steps: - uses: actions/checkout@v6 - uses: julia-actions/setup-julia@latest with: version: ${{ matrix.julia-version }} - uses: julia-actions/cache@v2 - uses: julia-actions/julia-buildpkg@latest continue-on-error: true - uses: julia-actions/julia-runtest@latest continue-on-error: true ================================================ FILE: .github/workflows/docs.yml ================================================ name: Documentation on: push: branches: - master tags: '*' pull_request: types: [opened, synchronize, reopened] schedule: - cron: '0 0 * * 0' jobs: docs: name: Build documentation env: DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} JULIA_DEBUG: Documenter runs-on: [self-hosted, linux, X64] steps: - uses: actions/checkout@v6 - uses: julia-actions/setup-julia@latest with: version: 'lts' - uses: julia-actions/cache@v2 - uses: julia-actions/julia-buildpkg@latest - run: julia --project=docs/ docs/make.jl ================================================ FILE: .gitignore ================================================ LocalPreferences.toml Manifest.toml deps/onemkl_blas.cpp deps/onemkl_blas.h deps/onemkl_lapack.cpp deps/onemkl_lapack.h deps/onemkl_sparse.cpp deps/onemkl_sparse.h docs/build ================================================ FILE: CITATION.cff ================================================ cff-version: 1.2.0 message: "If you use this software, please cite it as below." authors: - family-names: Besard given-names: Tim orcid: https://orcid.org/0000-0001-7826-8021 copyright: "© 2022 Julia Computing, and other contributors" title: "oneAPI.jl" version: 0.3.0 doi: 10.5281/zenodo.7139359 date-released: 2022-10-03 url: "https://github.com/JuliaGPU/oneAPI.jl" ================================================ FILE: LICENSE.md ================================================ The oneAPI.jl package is licensed under the MIT "Expat" License: > Copyright (c) 2020-present: Julia Computing and other contributors > > All Rights Reserved. > > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal > in the Software without restriction, including without limitation the rights > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > copies of the Software, and to permit persons to whom the Software is > furnished to do so, subject to the following conditions: > > The above copyright notice and this permission notice shall be included in all > copies or substantial portions of the Software. > > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > SOFTWARE. > ================================================ FILE: Project.toml ================================================ name = "oneAPI" uuid = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" authors = ["Tim Besard ", "Alexis Montoison", "Michel Schanen "] version = "2.6.1" [deps] AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82" ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" GPUToolbox = "096a3bc2-3ced-46d0-87f4-dd12716f4bfc" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NEO_jll = "700fe977-ac61-5f37-bbc8-c6c4b2b6a9fd" Preferences = "21216c6a-2e73-6563-6e65-726566657250" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SPIRVIntrinsics = "71d1d633-e7e8-4a92-83a1-de8814b09ba8" SPIRV_LLVM_Translator_jll = "4a5d46fc-d8cf-5151-a261-86b458210efb" SPIRV_Tools_jll = "6ac6d60f-d740-5983-97d7-a4482c0689f4" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d" oneAPI_Level_Zero_Loader_jll = "13eca655-d68d-5b81-8367-6d99d727ab01" oneAPI_Support_jll = "b049733a-a71d-5ed3-8eba-7d323ac00b36" [compat] AbstractFFTs = "1.5.0" AcceleratedKernels = "0.3.1, 0.4" Adapt = "4" CEnum = "0.4, 0.5" ExprTools = "0.1" GPUArrays = "11.2.1" GPUCompiler = "1.6" GPUToolbox = "0.1, 0.2, 0.3, 1" KernelAbstractions = "0.9.39" LLVM = "6, 7, 8, 9" NEO_jll = "=25.44.36015" Preferences = "1" SPIRVIntrinsics = "0.5" SPIRV_LLVM_Translator_jll = "21" SPIRV_Tools_jll = "2025.4.0" SpecialFunctions = "1.3, 2" StaticArrays = "1" julia = "1.10" oneAPI_Level_Zero_Loader_jll = "1.25" oneAPI_Support_jll = "0.9.2" [extras] libigc_jll = "94295238-5935-5bd7-bb0f-b00942e9bdd5" ================================================ FILE: README.md ================================================ # oneAPI.jl *Julia support for the oneAPI programming toolkit.* [![][doi-img]][doi-url] [![][buildkite-img]][buildkite-url] [![][codecov-img]][codecov-url] [![][docs-stable-img]][docs-stable-url] [![][docs-dev-img]][docs-dev-url] [doi-img]: https://zenodo.org/badge/252466420.svg [doi-url]: https://zenodo.org/badge/latestdoi/252466420 [buildkite-img]: https://badge.buildkite.com/00fff01fd4d6cdd905e61e2ce7ed0f7203ba227df9b575426c.svg?branch=master [buildkite-url]: https://buildkite.com/julialang/oneapi-dot-jl [codecov-img]: https://codecov.io/gh/JuliaGPU/oneAPI.jl/branch/master/graph/badge.svg [codecov-url]: https://codecov.io/gh/JuliaGPU/oneAPI.jl [docs-stable-img]: https://img.shields.io/badge/docs-stable-blue.svg [docs-stable-url]: https://juliagpu.github.io/oneAPI.jl/stable [docs-dev-img]: https://img.shields.io/badge/docs-dev-blue.svg [docs-dev-url]: https://juliagpu.github.io/oneAPI.jl/dev oneAPI.jl provides support for working with the [oneAPI unified programming model](https://software.intel.com/en-us/oneapi). The package is verified to work with the (currently) only implementation of this interface [that is part of the Intel Compute Runtime](https://github.com/intel/compute-runtime), only available on Linux. Windows support is experimental. ## Status **oneAPI.jl is looking for contributors and/or a maintainer. Reach out if you can help!** The current version of oneAPI.jl supports most of the oneAPI Level Zero interface, has good kernel programming capabilties, and as a demonstration of that it fully implements the GPUArrays.jl array interfaces. This results in a full-featured GPU array type. However, the package has not been extensively tested, and performance issues might be present. The integration with vendor libraries like oneMKL has been extended with support for sparse linear algebra operations. Some operations may still be unavailable or slow. ## Quick start You need to use Julia 1.10 or higher, and it is strongly advised to use [the official binaries](https://julialang.org/downloads/). For now, only Linux is supported. On Windows, you need to use the second generation Windows Subsystem for Linux (WSL2). **If you're using Intel Arc GPUs (A580, A750, A770, etc), you need to use at least Linux 6.2.** For other hardware, any recent Linux distribution should work. Once you have installed Julia, proceed by entering the package manager REPL mode by pressing `]` and adding the oneAPI package: ``` pkg> add oneAPI ``` This installation will take a couple of minutes to download necessary binaries, such as the oneAPI loader, several SPIR-V tools, etc. For now, the oneAPI.jl package also depends on [the Intel implementation](https://github.com/intel/compute-runtime) of the oneAPI spec. That means you need compatible hardware; refer to the Intel documentation for more details. Once you have oneAPI.jl installed, perform a smoke test by calling the `versioninfo()` function: ```julia julia> using oneAPI julia> oneAPI.versioninfo() Binary dependencies: - NEO: 25.35.35096 - libigc: 1.0.17193+0 - gmmlib: 22.3.20+0 - SPIRV_LLVM_Translator: 21 - SPIRV_Tools: 2025.4.0 - oneAPI_Support: 0.9.2 (oneMKL v2025.2.0) Toolchain: - Julia: 1.11.5 - LLVM: 16.0.6 1 driver: - 00000000-0000-0000-173d-d94201036013 (v1.3.24595, API v1.3.0) 2 devices: - Intel(R) Graphics [0x56a0] - Intel(R) HD Graphics P630 [0x591d] ``` If you have multiple compatible drivers or devices, use the `driver!` and `device!` functions to configure which one to use in the current task: ```julia julia> devices() ZeDevice iterator for 2 devices: 1. Intel(R) Graphics [0x56a0] 2. Intel(R) HD Graphics P630 [0x591d] julia> device() ZeDevice(GPU, vendor 0x8086, device 0x56a0): Intel(R) Graphics [0x56a0] julia> device!(2) ZeDevice(GPU, vendor 0x8086, device 0x591d): Intel(R) HD Graphics P630 [0x591d] ``` To ensure other functionality works as expected, you can run the test suite from the package manager REPL mode. Note that this will pull and run the test suite for [GPUArrays](https://github.com/JuliaGPU/GPUArrays.jl), which takes quite some time: ``` pkg> test oneAPI ... Testing finished in 16 minutes, 27 seconds, 506 milliseconds Test Summary: | Pass Total Time Overall | 4945 4945 SUCCESS Testing oneAPI tests passed ``` ## Usage The functionality of oneAPI.jl is organized as follows: - low-level wrappers for the Level Zero library - kernel programming capabilities - abstractions for high-level array programming The level zero wrappers are available in the `oneL0` submodule, and expose all flexibility of the underlying APIs with user-friendly wrappers: ```julia julia> using oneAPI, oneAPI.oneL0 julia> drv = first(drivers()); julia> ctx = ZeContext(drv); julia> dev = first(devices(drv)) ZeDevice(GPU, vendor 0x8086, device 0x1912): Intel(R) Gen9 julia> compute_properties(dev) (maxTotalGroupSize = 256, maxGroupSizeX = 256, maxGroupSizeY = 256, maxGroupSizeZ = 256, maxGroupCountX = 4294967295, maxGroupCountY = 4294967295, maxGroupCountZ = 4294967295, maxSharedLocalMemory = 65536, subGroupSizes = (8, 16, 32)) julia> queue = ZeCommandQueue(ctx, dev); julia> execute!(queue) do list append_barrier!(list) end ``` Built on top of that, are kernel programming capabilities for executing Julia code on oneAPI accelerators. For now, we reuse OpenCL intrinsics, and compile to SPIR-V using [Khronos' translator](https://github.com/KhronosGroup/SPIRV-LLVM-Translator): ```julia julia> function kernel() barrier(0) return end julia> @oneapi items=1 kernel() ``` Code reflection macros are available to see the generated code: ```julia julia> @device_code_llvm @oneapi items=1 kernel() ``` ```llvm ; @ REPL[18]:1 within `kernel' define dso_local spir_kernel void @_Z17julia_kernel_3053() local_unnamed_addr { top: ; @ REPL[18]:2 within `kernel' ; ┌ @ oneAPI.jl/src/device/opencl/synchronization.jl:9 within `barrier' @ oneAPI.jl/src/device/opencl/synchronization.jl:9 ; │┌ @ oneAPI.jl/src/device/opencl/utils.jl:34 within `macro expansion' call void @_Z7barrierj(i32 0) ; └└ ; @ REPL[18]:3 within `kernel' ret void } ``` ```julia julia> @device_code_spirv @oneapi items=1 kernel() ``` ```spirv ; SPIR-V ; Version: 1.0 ; Generator: Khronos LLVM/SPIR-V Translator; 14 ; Bound: 9 ; Schema: 0 OpCapability Addresses OpCapability Kernel %1 = OpExtInstImport "OpenCL.std" OpMemoryModel Physical64 OpenCL OpEntryPoint Kernel %4 "_Z17julia_kernel_3067" OpSource OpenCL_C 200000 OpName %top "top" %uint = OpTypeInt 32 0 %uint_2 = OpConstant %uint 2 %uint_0 = OpConstant %uint 0 %void = OpTypeVoid %3 = OpTypeFunction %void %4 = OpFunction %void None %3 %top = OpLabel OpControlBarrier %uint_2 %uint_2 %uint_0 OpReturn OpFunctionEnd ``` Finally, the `oneArray` type makes it possible to use your oneAPI accelerator without the need to write custom kernels, thanks to Julia's high-level array abstractions: ```julia julia> a = oneArray(rand(Float32, 2,2)) 2×2 oneArray{Float32,2}: 0.592979 0.996154 0.874364 0.232854 julia> a .+ 1 2×2 oneArray{Float32,2}: 1.59298 1.99615 1.87436 1.23285 ``` The oneMKL integration provides extended support for linear algebra operations, including sparse matrix operations that integrate with Julia's standard LinearAlgebra interface: ```julia julia> using oneAPI, oneAPI.oneMKL, SparseArrays, LinearAlgebra julia> A = sprand(100, 100, 0.1) julia> dA = oneMKL.oneSparseMatrixCSC(A) julia> x = oneArray(rand(100)) julia> y = dA * x # Matrix-vector multiplication via LinearAlgebra ``` ### `Float64` support Not all oneAPI GPUs support Float64 datatypes. You can test if your GPU does using the following code: ```julia julia> using oneAPI julia> oneL0.module_properties(device()).fp64flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP64 == oneL0.ZE_DEVICE_MODULE_FLAG_FP64 false ``` If your GPU doesn't, executing code that relies on Float64 values will result in an error: ```julia julia> oneArray([1.]) .+ 1 ┌ Error: Module compilation failed: │ │ error: Double type is not supported on this platform. ``` ## Development To work on oneAPI.jl, you just need to `dev` the package. In addition, you may need to **build the binary support library** that's used to interface with oneMKL and other C++ vendor libraries. This library is normally provided by the oneAPI_Support_jll.jl package, however, we only guarantee to update this package when releasing oneAPI.jl. You can build this library yourself by simply executing `deps/build_local.jl`. To facilitate development, there are other things you may want to configure: ### Enabling the oneAPI validation layer The oneAPI Level Zero libraries feature a so-called validation layer, which validates the arguments to API calls. This can be useful to spot potential isssues, and can be enabled by setting the following environment variables: - `ZE_ENABLE_VALIDATION_LAYER=1` - `ZE_ENABLE_PARAMETER_VALIDATION=1` - `EnableDebugBreak=0` (this is needed to work around intel/compute-runtime#639) ### Using a debug toolchain If you're experiencing an issue with the underlying toolchain (NEO, IGC, etc), you may want to use a debug build of these components, which also perform additional validation. This can be done simply by calling `oneAPI.set_debug!(true)` and restarting your Julia session. This sets a preference used by the respective JLL packages. ### Using a local toolchain To further debug the toolchain, you may need a custom build and point oneAPI.jl towards it. This can also be done using preferences, overriding the paths to resources provided by the various JLLs that oneAPI.jl uses. A helpful script to automate this is provided in the `res` folder of this repository: ``` $ julia res/local.jl Trying to find local IGC... - found libigc at /usr/local/lib/libigc.so - found libiga64 at /usr/local/lib/libiga64.so - found libigdfcl at /usr/local/lib/libigdfcl.so - found libopencl-clang at /usr/local/lib/libopencl-clang.so.11 Trying to find local gmmlib... - found libigdgmm at /usr/local/lib/libigdgmm.so Trying to find local NEO... - found libze_intel_gpu.so.1 at /usr/local/lib/libze_intel_gpu.so.1 - found libigdrcl at /usr/local/lib/intel-opencl/libigdrcl.so Trying to find local oneAPI loader... - found libze_loader at /lib/x86_64-linux-gnu/libze_loader.so - found libze_validation_layer at /lib/x86_64-linux-gnu/libze_validation_layer.so Writing preferences... ``` The discovered paths will be written to a global file with preferences, typically `$HOME/.julia/environments/vX.Y/LocalPreferences.toml` (where `vX.Y` refers to the Julia version you are using). You can modify this file, or remove it when you want to revert to default set of binaries. ================================================ FILE: codecov.yml ================================================ coverage: ignore: - "lib/*/lib*.jl" - "src/device" - "res/" status: patch: false project: false changes: false ================================================ FILE: deps/.clang-format ================================================ --- IndentWidth: '4' MaxEmptyLinesToKeep: '2' ... ================================================ FILE: deps/.gitignore ================================================ liboneapilib.so Manifest.toml ================================================ FILE: deps/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.13) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) project(oneAPISupport) add_library(oneapi_support SHARED src/sycl.h src/sycl.hpp src/sycl.cpp src/onemkl.h src/onemkl.cpp src/onemkl_dft.h src/onemkl_dft.cpp ) target_link_libraries(oneapi_support mkl_sycl # DFT component libraries needed for oneMKL DFT template instantiations mkl_sycl_dft mkl_cdft_core mkl_intel_ilp64 mkl_sequential mkl_core sycl OpenCL # XXX: we don't want to link against this plugin, but otherwise the run-time # loader doesn't find it (since it's located in the non-global Conda # library directory, and we can't set LD_LIBRARY_PATH from within Julia). ur_adapter_level_zero ) install(TARGETS oneapi_support LIBRARY DESTINATION lib) ================================================ FILE: deps/Project.toml ================================================ [deps] CMake_jll = "3f4e10e2-61f2-5801-8945-23b9d642d0e6" Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Git = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Ninja_jll = "76642167-d241-5cee-8c94-7a494e8cb7b7" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Preferences = "21216c6a-2e73-6563-6e65-726566657250" Scratch = "6c6a2e73-6563-6170-7368-637461726353" oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d" oneAPI_Support_Headers_jll = "24f86df5-245d-5634-a4cc-32433d9800b3" [compat] oneAPI_Support_Headers_jll = "=2025.2.0" ================================================ FILE: deps/build_ci.jl ================================================ using Pkg Pkg.activate(@__DIR__) Pkg.instantiate() using Git, Scratch, Dates oneAPI = Base.UUID("8f75cd03-7ff8-4ecb-9b8f-daf728133b1b") # get scratch directories support_dir = get_scratch!(oneAPI, "support") # is this a full-fledged check-out? if isdir(joinpath(@__DIR__), "..", ".git") # determine latest change to the wrappers deps_timestamp = parse(Int, read(`$(git()) -C $(@__DIR__) log -1 --format=%ct src`, String)) @info "Latest change to the wrappers: $(unix2datetime(deps_timestamp))" # find out which version of oneAPI_Support_jll we are using Pkg.activate(joinpath(@__DIR__, "..")) Pkg.instantiate() deps = collect(values(Pkg.dependencies())) filter!(deps) do dep dep.name == "oneAPI_Support_jll" end library_version = only(deps).version @info "oneAPI_Support_jll version: $(library_version)" # compare to the JLL's tags jll_tags = mktempdir() do dir if !isdir(joinpath(support_dir, ".git")) run(`$(git()) clone -q https://github.com/JuliaBinaryWrappers/oneAPI_Support_jll.jl $dir`) else run(`$(git()) -C $dir fetch -q`) end tags = Dict{String,Int}() for line in eachline(`$(git()) -C $dir tag --format "%(refname:short) %(creatordate:unix)"`) tag, timestamp = split(line) tags[tag] = parse(Int, timestamp) end tags end jll_timestamp = jll_tags["oneAPI_Support-v$(library_version)"] @info "oneAPI_Support_jll timestamp: $(unix2datetime(jll_timestamp))" if deps_timestamp > jll_timestamp @info "Wrappers have changed since the last JLL build. Building the support library locally." include(joinpath(@__DIR__, "build_local.jl")) else @info "Wrappers have not changed since the last JLL build. Using the JLL's support library." end else @warn """oneAPI.jl source code is not checked-out from Git. This means we cannot check for changes, and need to unconditionally build the support library.""" include(joinpath(@__DIR__, "build_local.jl")) end ================================================ FILE: deps/build_local.jl ================================================ # build liboneapi_support with C wrappers for C++ APIs using Pkg Pkg.activate(@__DIR__) Pkg.instantiate() if haskey(ENV, "BUILDKITE") run(`buildkite-agent annotate 'Using a locally-built support library; A bump of oneAPI_Support_jll is required before releasing this packages.' --style 'warning' --context 'ctx-deps'`) end using Scratch, Preferences, CMake_jll, Ninja_jll, oneAPI_Level_Zero_Headers_jll oneAPI = Base.UUID("8f75cd03-7ff8-4ecb-9b8f-daf728133b1b") # get scratch directories conda_dir = get_scratch!(oneAPI, "conda") install_dir = get_scratch!(oneAPI, "deps") rm(install_dir; recursive=true) # get build directory build_dir = if isempty(ARGS) mktempdir() else ARGS[1] end mkpath(build_dir) # install the toolchain try using Conda catch err # Sometimes, Conda fails to import because its environment is missing. # That's probably caused by a missing build, but Pkg should do that... Pkg.build("Conda") using Conda end if !isdir(Conda.ROOTENV) # Same as above Pkg.build("Conda") end if !isfile(joinpath(conda_dir, "condarc-julia.yml")) Conda.create(conda_dir) # conda#8850 mkpath(joinpath(conda_dir, "conda-meta")) touch(joinpath(conda_dir, "conda-meta", "history")) end Conda.add_channel("https://software.repos.intel.com/python/conda/", conda_dir) Conda.add(["dpcpp_linux-64=2025.2.0", "mkl-devel-dpcpp=2025.2.0"], conda_dir) Conda.list(conda_dir) # XXX: isn't there a Conda package providing ze_api.hpp? include_dir = joinpath(oneAPI_Level_Zero_Headers_jll.artifact_dir, "include") # build and install withenv("PATH"=>"$(ENV["PATH"]):$(Conda.bin_dir(conda_dir))", "LD_LIBRARY_PATH"=>Conda.lib_dir(conda_dir)) do cmake() do cmake_path ninja() do ninja_path run(```$cmake_path -DCMAKE_CXX_COMPILER="icpx" -DCMAKE_CXX_FLAGS="-fsycl -isystem $(conda_dir)/include -isystem $include_dir -fdiagnostics-color=always" -DCMAKE_INSTALL_RPATH=$(Conda.lib_dir(conda_dir)) -DCMAKE_INSTALL_PREFIX=$install_dir -GNinja -S $(@__DIR__) -B $build_dir```) run(`$cmake_path --build $(build_dir) --target install`) end end end # TODO: adapt when we support more platforms lib_path = joinpath(install_dir, "lib", "liboneapi_support.so") @assert ispath(lib_path) # tell oneAPI_Support_jll to load our library instead of the default artifact one set_preferences!( joinpath(dirname(@__DIR__), "LocalPreferences.toml"), "oneAPI_Support_jll", "liboneapi_support_path" => lib_path; force=true, ) # copy the preferences to `test/` as well to work around Pkg.jl#2500 cp(joinpath(dirname(@__DIR__), "LocalPreferences.toml"), joinpath(dirname(@__DIR__), "test", "LocalPreferences.toml"); force=true) ================================================ FILE: deps/generate_helpers.jl ================================================ non_parametric_routines = ["init_matrix_handle", "release_matrix_handle", "set_matrix_property", "init_matmat_descr", "release_matmat_descr", "set_matmat_data", "get_matmat_data", "matmat", "omatcopy", "sort_matrix", "optimize_gemv", "optimize_gemm", "optimize_trmv", "optimize_trsv", "optimize_trsm", "init_omatconvert_descr", "release_omatconvert_descr", "init_omatadd_descr", "release_omatadd_descr", "omatconvert_buffer_size", "omatconvert_analyze", "omatconvert_get_nnz", "omatconvert", "omatadd_buffer_size", "omatadd_analyze", "omatadd_get_nnz"] function analyzer_template(library::String, cpp_headers::String, name_routine::String) list_parameters = Vector{String}[] list_types = Vector{String}[] list_versions = String[] list_suffix = String[] if (library == "blas") || (library == "sparse" && !(name_routine ∈ non_parametric_routines)) prefix = (library == "sparse") ? "SPARSE_" : "BUF_" occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T)", cpp_headers) && (list_parameters = ["T"]) occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(FpType)", cpp_headers) && (list_parameters = ["FpType"]) occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(Tf, Ti)", cpp_headers) && (list_parameters = ["Tf", "Ti"]) occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T, Ts)", cpp_headers) && (list_parameters = ["T", "Ts"]) occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(IntType, FpType)", cpp_headers) && (list_parameters = ["IntType", "FpType"]) occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(Ta, Tb, Tc, Ts)", cpp_headers) && (list_parameters = ["Ta", "Tb", "Tc", "Ts"]) occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T, Tres)", cpp_headers) && (list_parameters = ["T", "Tres"]) occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T, Treal)", cpp_headers) && (list_parameters = ["T", "Treal"]) occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T, Tc, Ts)", cpp_headers) && (list_parameters = ["T", "Tc", "Ts"]) occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T, Tc)", cpp_headers) && (list_parameters = ["T", "Tc"]) (list_parameters == []) && @warn("Unable to determine the parametric parameters of $(name_routine).") for (type, version, suffix) in [(["sycl::half"], "H", ""), (["float"], "S", ""), (["double"], "D", ""), (["std::complex"], "C", ""), (["std::complex"], "Z", "")] if occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))($(type[1]))", cpp_headers) push!(list_types, type) push!(list_versions, version) push!(list_suffix, suffix) end end for (type, version, suffix) in [(["int32_t","float"], "S", ""), (["int64_t","float"], "S", "_64"), (["int32_t","double"], "D", ""), (["int64_t","double"], "D", "_64"), (["int32_t","std::complex"], "C", ""), (["int64_t","std::complex"], "C", "_64"), (["int32_t","std::complex"], "Z", ""), (["int64_t","std::complex"], "Z", "_64"), (["float","int32_t"], "S", ""), (["float","int64_t"], "S", "_64"), (["double","int32_t"], "D", ""), (["double","int64_t"], "D", "_64"), (["std::complex","int32_t"], "C", ""), (["std::complex","int64_t"], "C", "_64"), (["std::complex","int32_t"], "Z", ""), (["std::complex","int64_t"], "Z", "_64"), (["sycl::half","sycl::half"], "H", ""), (["float","float"], "S", ""), (["double","double"], "D", ""), (["std::complex","float"], "CS", ""), (["std::complex","double"], "ZD", ""), (["std::complex","std::complex"], "C", ""), (["std::complex","std::complex"], "Z", "")] if occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))($(type[1]), $(type[2]))", cpp_headers) push!(list_types, type) push!(list_versions, version) push!(list_suffix, suffix) end end for (type, version, suffix) in [(["sycl::half","sycl::half","sycl::half"], "H", ""), (["float","float","float"], "S", ""), (["double","double","double"], "D", ""), (["std::complex","float","float"], "CS", ""), (["std::complex","float", "std::complex"], "C", ""), (["std::complex","double","double"], "ZD", ""), (["std::complex","double","std::complex"], "Z", "")] if occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))($(type[1]), $(type[2]), $(type[3]))", cpp_headers) push!(list_types, type) push!(list_versions, version) push!(list_suffix, suffix) end end for (type, version, suffix) in [(["sycl::half","sycl::half","sycl::half","sycl::half"], "H", ""), (["float","float","float","float"], "S", ""), (["double","double","double","double"], "D", ""), (["std::complex","std::complex","std::complex","std::complex"], "C", ""), (["std::complex","std::complex","std::complex","std::complex"], "Z", "")] if occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))($(type[1]), $(type[2]), $(type[3]), $(type[4]))", cpp_headers) push!(list_types, type) push!(list_versions, version) push!(list_suffix, suffix) end end end return list_parameters, list_types, list_versions, list_suffix end ================================================ FILE: deps/generate_interfaces.jl ================================================ using oneAPI_Support_Headers_jll include("generate_helpers.jl") include_dir = joinpath(oneAPI_Support_Headers_jll.artifact_dir, "include") blas = [joinpath(include_dir, "oneapi", "mkl", "blas", "buffer_decls.hpp")] lapack = [joinpath(include_dir, "oneapi", "mkl", "lapack", "lapack.hpp"), joinpath(include_dir, "oneapi", "mkl", "lapack", "scratchpad.hpp")] sparse = [joinpath(include_dir, "oneapi", "mkl", "spblas", "sparse_structures.hpp"), joinpath(include_dir, "oneapi", "mkl", "spblas", "sparse_auxiliary.hpp"), joinpath(include_dir, "oneapi", "mkl", "spblas", "sparse_operations.hpp")] dict_version = Dict{Int, Char}(1 => 'S', 2 => 'D', 3 => 'C', 4 => 'Z') version_types = Dict{Char, String}('S' => "float", 'D' => "double", 'C' => "std::complex", 'Z' => "std::complex") version_types_header = Dict{Char, String}('S' => "float", 'D' => "double", 'C' => "float _Complex", 'Z' => "double _Complex") comments = ["namespace", "#", "}", "/*", "*", "//", "[[", "ONEMKL_DECLARE_", "ONEMKL_INLINE_DECLARE"] void_output = ["init_matrix_handle", "init_matmat_descr", "release_matmat_descr", "set_matmat_data", "get_matmat_data", "init_omatadd_descr", "init_omatconvert_descr"] function generate_headers(library::String, filename::Vector{String}, output::String; pattern::String="") routines = Dict{String,Int}() signatures = [] signatures2 = [] cpp_headers = "" for file in filename cpp_headers = cpp_headers * read(file, String) end cpp_headers = replace(cpp_headers, "std::int32_t" => "int32_t") cpp_headers = replace(cpp_headers, "std::int64_t" => "int64_t") cpp_headers = replace(cpp_headers, "; \\" => ";") cpp_headers = replace(cpp_headers, ")\n\n" => ");\n\n") cpp_headers = replace(cpp_headers, "\\\n" => "\n") cpp_headers = replace(cpp_headers, "sycl::event\n" => "sycl::event ") headers = "" # Remove comments for header in split(cpp_headers, '\n') mapreduce(x -> !startswith(strip(header), x) && !occursin("\"", header), &, comments) && (headers *= header) end # Analyse each header headers = split(headers, ';') for (i, header) in enumerate(headers) # We only generate C interfaces for exported symbols !occursin("DLL_EXPORT", header) && !occursin("_scratchpad_size", header) && continue # We don't want to interface routines with the following types, parameters or names occursin("class", header) && continue occursin("span", header) && continue occursin("bfloat16", header) && continue occursin("::int8_t", header) && continue (library == "lapack") && occursin("void", header) && continue # We only want USM routines (library == "sparse") && occursin("trsv", header) && !occursin("optimize_trsv", header) && !occursin("alpha", header) && continue # SPARSE routine occursin("(matrix_handle_t SpMat", header) && continue # SPARSE routine occursin("set_csr_data(matrix_handle_t", header) && continue # SPARSE routine occursin("release_matrix_handle(matrix_handle_t", header) && continue # SPARSE routine occursin("get_matmat_data", header) && continue # SPARSE routine occursin("matmat(", header) && continue # SPARSE routine bool = occursin("release", header) || occursin("init", header) (library == "sparse") && occursin("omatconvert", header) && !bool && continue # SPARSE routine (library == "sparse") && occursin("omatadd", header) && !bool && continue # SPARSE routine occursin("gemm_bias", header) && continue # BLAS routine occursin("getri_batch", header) && occursin("ldainv", header) && continue # LAPACK routine # Check if the routine is a template template = occursin("template", header) if template header = replace(header, "template = nullptr> " => "") header = replace(header, "template = nullptr> " => "") header = replace(header, "template = nullptr> " => "") header = replace(header, "template = nullptr>" => "") header = replace(header, "template = nullptr>" => "") header = replace(header, "template = nullptr>" => "") header = replace(header, "template = nullptr>" => "") header = replace(header, "template = nullptr>" => "") header = replace(header, "template = nullptr>" => "") end type_routine = "" if occursin("_scratchpad_size", header) type_routine = "scratchpad_size" elseif occursin("sycl::event", header) header = replace(header, "const std::vector &events = {}" => "") header = replace(header, "const std::vector &events = {}" => "") header = replace(header, "const std::vector &event_list = {}" => "") header = replace(header, "std::vector &dependencies = {}" => "") header = replace(header, "std::vector &dependencies" => "") # typo in "onemkl_sparse.cpp" type_routine = "usm" else type_routine = "buffer" end # Add a space for the returned argument header = replace(header, "sycl::event" => "sycl::event ") header = replace(header, "void" => "void ") # Replace the types header = replace(header, "sycl::queue &queue" => "syclQueue_t device_queue") header = replace(header, "sycl::queue& queue" => "syclQueue_t device_queue") if library ∈ ("blas", "sparse") header = replace(header, "compute_mode mode = MKL_BLAS_COMPUTE_MODE" => "") header = replace(header, "index_base base=index_base::zero" => "onemklIndex base") header = replace(header, "sycl::buffer &" => "Ta *") header = replace(header, "sycl::buffer &" => "Tb *") header = replace(header, "sycl::buffer &" => "Tc *") header = replace(header, "sycl::buffer &" => "Td *") header = replace(header, "sycl::buffer &" => "Treal *") header = replace(header, "sycl::buffer &" => "Tres *") header = replace(header, "sycl::buffer &" => "T *") header = replace(header, "sycl::buffer &" => "Ta *") header = replace(header, "sycl::buffer &" => "Tb *") header = replace(header, "sycl::buffer &" => "Tc *") header = replace(header, "sycl::buffer &" => "Td *") header = replace(header, "sycl::buffer &" => "Ti *") header = replace(header, "sycl::buffer &" => "Tf *") header = replace(header, "sycl::buffer &" => "Treal *") header = replace(header, "sycl::buffer &" => "Tres *") header = replace(header, "sycl::buffer &" => "T *") header = replace(header, "sycl::buffer &" => "T *") header = replace(header, "sycl::buffer &" => "FpType *") header = replace(header, "sycl::buffer &" => "IntType *") end header = replace(header, "sycl::buffer &" => "float *") header = replace(header, "sycl::buffer &" => "float *") header = replace(header, "sycl::buffer &" => "double *") header = replace(header, "sycl::buffer> &" => "float _Complex *") header = replace(header, "sycl::buffer> &" => "float _Complex *") header = replace(header, "sycl::buffer> &" => "double _Complex *") header = replace(header, "sycl::buffer &" => "int32_t *") header = replace(header, "sycl::buffer &" => "int64_t *") header = replace(header, "sycl::buffer &" => "float *") header = replace(header, "sycl::buffer &" => "double *") header = replace(header, "sycl::buffer, 1> &" => "float _Complex *") header = replace(header, "sycl::buffer, 1> &" => "double _Complex *") header = replace(header, "sycl::buffer *" => "uint8_t *") header = replace(header, "sycl::buffer &" => "int32_t *") header = replace(header, "sycl::buffer &" => "int64_t *") header = replace(header, "sycl::buffer *" => "int64_t *") header = replace(header, "std::complex *" => "float _Complex *") header = replace(header, "std::complex *" => "float _Complex *") header = replace(header, "std::complex *" => "double _Complex *") header = replace(header, "template <>\n" => "") header = replace(header, ">" => "") header = replace(header, ">" => "") header = replace(header, "" => "") header = replace(header, "" => "") header = replace(header, "oneapi::mkl::transpose" => "onemklTranspose") header = replace(header, "oneapi::mkl::uplo" => "onemklUplo") header = replace(header, "oneapi::mkl::diag" => "onemklDiag") header = replace(header, "oneapi::mkl::side" => "onemklSide") header = replace(header, "oneapi::mkl::offset" => "onemklOffset") header = replace(header, "oneapi::mkl::job" => "onemklJob") header = replace(header, "oneapi::mkl::generate" => "onemklGenerate") header = replace(header, "oneapi::mkl::compz" => "onemklCompz") header = replace(header, "oneapi::mkl::direct" => "onemklDirect") header = replace(header, "oneapi::mkl::storev" => "onemklStorev") header = replace(header, "oneapi::mkl::rangev" => "onemklRangev") header = replace(header, "oneapi::mkl::order" => "onemklOrder") header = replace(header, "oneapi::mkl::jobsvd" => "onemklJobsvd") header = replace(header, "oneapi::mkl::layout" => "onemklLayout") header = replace(header, "oneapi::mkl::index" => "onemklIndex") header = replace(header, "oneapi::mkl::property" => "onemklProperty") header = replace(header, "sparse::matmat_descr_t" => "matmat_descr_t") # Sanitize the header header = replace(header, " \\" => "") header = replace(header, "\n" => "") header = replace(header, "DLL_EXPORT " => "") header = replace(header, "const " => "") for i = 1:20 header = replace(header, " " => " ") end header = replace(header, "( " => "(") header = replace(header, ", )" => ")") header = replace(header, ",)" => ")") header = replace(header, " void" => "void") header = replace(header, " sycl::event" => "sycl::event") header = replace(header, "* const* " => "**") header = replace(header, "int64_t**" => "int64_t **") ind1 = findfirst(' ', header) ind2 = findfirst('(', header) name_routine = header[ind1+1:ind2-1] !haskey(routines, name_routine * type_routine) && (routines[name_routine * type_routine] = 0) (name_routine == "gesvd_scratchpad_size") && (routines[name_routine * type_routine] > 1) && continue routines[name_routine * type_routine] += 1 # They use template for BLAS and SPARSE routines list_parameters, list_types, list_versions, list_suffix = analyzer_template(library, cpp_headers, name_routine) !isempty(list_parameters) && (type_routine == "buffer") && (library == "sparse") && continue # Only wrap the USM version of sparse routines version = 'X' version = occursin("double", header) ? 'D' : version version = occursin("float", header) ? 'S' : version version = occursin("float _Complex", header) ? 'C' : version version = occursin("double _Complex", header) ? 'Z' : version version = occursin("_scratchpad_size", header) ? 'W' : version if version == 'W' # The version 'W' is used for routines with suffix "_scratchpad_size" versions = ('S', 'D', 'C', 'Z') mapreduce(x -> startswith(name_routine, x), |, ["or", "sy"]) && !startswith(name_routine, "sytrf") && (versions = ('S', 'D')) mapreduce(x -> startswith(name_routine, x), |, ["un", "he"]) && (versions = ('C', 'Z')) routines[name_routine * type_routine] = routines[name_routine * type_routine] - 1 + length(versions) for blas_version in versions copy_header = header copy_header = replace(copy_header, "typename fp_type::value_type" => version_types_header[blas_version]) copy_header = replace(copy_header, "fp_type" => version_types_header[blas_version]) copy_header = replace(copy_header, "fp" => version_types_header[blas_version]) copy_header = replace(copy_header, name_routine => "onemkl$(blas_version)$(name_routine)") if name_routine ∈ ("heevx_scratchpad_size", "hegvx_scratchpad_size") copy_header = replace(copy_header, "typename float _Complex::value_type" => "float") copy_header = replace(copy_header, "typename double _Complex::value_type" => "double") end if occursin("batch", name_routine) && !occursin("*", header) copy_header = replace(copy_header, "_batch" => "_batch_strided") end push!(signatures, (copy_header, name_routine, blas_version, type_routine, template)) end else if isempty(list_versions) # The routine "optimize_trsm" has two versions. suffix = "" (name_routine == "optimize_trsm") && occursin("columns", header) && (suffix = "_advanced") (name_routine == "optimize_gemm") && occursin("columns", header) && (suffix = "_advanced") name_routine ∈ ("set_csr_data", "set_coo_data") && occursin("int64_t", header) && (suffix = "_64") occursin("batch", name_routine) && !occursin("**", header) && (suffix = "_strided") header = replace(header, "$(name_routine)(" => "onemkl$(version)$(name_routine)$(suffix)(") header = replace(header, "void onemkl" => "int onemkl") header = replace(header, "sycl::event onemkl" => "int onemkl") if library == "sparse" if occursin("std::complex", header) (version == 'C') && (header = replace(header, "std::complex " => "float _Complex ")) (version == 'Z') && (header = replace(header, "std::complex " => "double _Complex ")) end header = replace(header, "transpose " => "onemklTranspose ") header = replace(header, "uplo " => "onemklUplo ") header = replace(header, "diag " => "onemklDiag ") header = replace(header, "side " => "onemklSide ") header = replace(header, "layout " => "onemklLayout ") header = replace(header, "index_base " => "onemklIndex ") header = replace(header, "property " => "onemklProperty ") header = replace(header, "sparse::matrix_view_descr " => "onemklMatrixView ") header = replace(header, "matrix_view_descr " => "onemklMatrixView ") header = replace(header, "sparse::matmat_request " => "onemklMatmatRequest ") header = replace(header, "omatconvert_alg " => "onemklOmatconvertAlg ") header = replace(header, "omatadd_alg " => "onemklOmataddAlg ") header = replace(header, name_routine => "sparse_" * name_routine) end push!(signatures, (header, name_routine, version, type_routine, template)) else n = length(list_parameters) for (i, type) in enumerate(list_types) version = list_versions[i] suffix = list_suffix[i] version = (name_routine ∈ ("her", "herk", "her2k", "rotg", "nrm2", "asum", "hpr")) && (version == "CS") ? "C" : version version = (name_routine ∈ ("her", "herk", "her2k", "rotg", "nrm2", "asum", "hpr")) && (version == "ZD") ? "Z" : version copy_header = header for (j, parameter) in enumerate(reverse(list_parameters)) k = n-j+1 copy_header = replace(copy_header, parameter => type[k]) end copy_header = replace(copy_header, "transpose " => "onemklTranspose ") copy_header = replace(copy_header, "uplo " => "onemklUplo ") copy_header = replace(copy_header, "diag " => "onemklDiag ") copy_header = replace(copy_header, "side " => "onemklSide ") copy_header = replace(copy_header, "layout " => "onemklLayout ") copy_header = replace(copy_header, "index_base " => "onemklIndex ") copy_header = replace(copy_header, "std::complex" => "float _Complex") copy_header = replace(copy_header, "std::complex" => "double _Complex") copy_header = replace(copy_header, "sycl::half" => "short") copy_header = replace(copy_header, name_routine => "onemkl$(version)$(name_routine)$(suffix)") copy_header = replace(copy_header, "sycl::event onemkl" => "int onemkl") copy_header = replace(copy_header, "void onemkl" => "int onemkl") if library == "sparse" copy_header = replace(copy_header, name_routine => "sparse_" * name_routine) end if occursin("batch", name_routine) && !occursin("**", header) copy_header = replace(copy_header, "_batch" => "_batch_strided") end if library == "blas" # Out-of-place variants of trsm and trmm if occursin("trsm", header) && occursin("ldc", header) copy_header = replace(copy_header, "trsm" => "trsm_variant") end if occursin("trmm", header) && occursin("ldc", header) copy_header = replace(copy_header, "trmm" => "trmm_variant") end copy_header = replace(copy_header, "compute_mode mode," => "") copy_header = replace(copy_header, ", compute_mode mode)" => ")") copy_header = replace(copy_header, "value_or_pointer" => "float _Complex") copy_header = replace(copy_header, "value_or_pointer" => "double _Complex") copy_header = replace(copy_header, "value_or_pointer" => "short") copy_header = replace(copy_header, "value_or_pointer" => "float") copy_header = replace(copy_header, "value_or_pointer" => "double") end push!(signatures, (copy_header, name_routine, version, type_routine, template)) end end end end # Check the number of methods blacklist = String[] for name_routine in keys(routines) if (routines[name_routine] > 4) if occursin("set_csr_data", name_routine) || occursin("set_coo_data", name_routine) || occursin("_batch", name_routine) if (routines[name_routine] > 8) @warn "The routine $(name_routine) has $(routines[name_routine]) and will not be interfaced." push!(blacklist, name_routine) end else @warn "The routine $(name_routine) has $(routines[name_routine]) and will not be interfaced." push!(blacklist, name_routine) end end end path_oneapi_headers = joinpath(@__DIR__, output) oneapi_headers = open(path_oneapi_headers, "w") for (header, name_routine, version, type_routine, template) in signatures # Blacklist (name_routine in blacklist) && continue # Pass scalars (e.g. alpha/beta inputs) as references instead of values for type in ("short", "float", "double", "float _Complex", "double _Complex") header = replace(header, Regex("$type ([A-Za-z0-9]+(?![^,]*[_*]))[^,]*,") => SubstitutionString("$type $pattern\\1,")) header = replace(header, Regex(", $type ([A-Za-z0-9)]+(?![^,]*[_*]))[^,]*") => SubstitutionString(", $type $pattern\\1")) end push!(signatures2, (header, name_routine, version, type_routine, template)) pos = findfirst('(', header) fun = split(header, " ") len = 0 for (i, part) in enumerate(fun) len += length(part) if len ≤ 90 (i ≠ 1) && write(oneapi_headers, " ") write(oneapi_headers, part) else write(oneapi_headers, "\n") for i = 1:pos write(oneapi_headers, " ") end write(oneapi_headers, part) len = pos + length(part) end end write(oneapi_headers, ";\n\n") end close(oneapi_headers) return signatures2 end function generate_cpp(library::String, filename::Vector{String}, output::String; pattern::String="") signatures = generate_headers(library, filename, output; pattern) path_oneapi_cpp = joinpath(@__DIR__, output) oneapi_cpp = open(path_oneapi_cpp, "w") for (header, name, version, type_routine, template) in signatures parameters = split(header, "(")[2] parameters = split(parameters, ")")[1] parameters = replace(parameters, "syclQueue_t device_queue" => "device_queue->val") parameters = replace(parameters, "int32_t* " => "") parameters = replace(parameters, "int32_t " => "") parameters = replace(parameters, "int64_t* " => "") parameters = replace(parameters, "int64_t " => "") parameters = replace(parameters, "matrix_handle_t *" => "(oneapi::mkl::sparse::matrix_handle_t*) ") parameters = replace(parameters, "matrix_handle_t " => "(oneapi::mkl::sparse::matrix_handle_t) ") parameters = replace(parameters, "matmat_descr_t *" => "(oneapi::mkl::sparse::matmat_descr_t*) ") parameters = replace(parameters, "matmat_descr_t " => "(oneapi::mkl::sparse::matmat_descr_t) ") parameters = replace(parameters, "omatadd_descr_t *" => "(oneapi::mkl::sparse::omatadd_descr_t*) ") parameters = replace(parameters, "omatadd_descr_t " => "(oneapi::mkl::sparse::omatadd_descr_t) ") parameters = replace(parameters, "omatconvert_descr_t *" => "(oneapi::mkl::sparse::omatconvert_descr_t*) ") parameters = replace(parameters, "omatconvert_descr_t " => "(oneapi::mkl::sparse::omatconvert_descr_t) ") parameters = replace(parameters, "short **" => "reinterpret_cast") parameters = replace(parameters, "float _Complex **" => "reinterpret_cast **>") parameters = replace(parameters, "double _Complex **" => "reinterpret_cast **>") parameters = replace(parameters, "short *" => "reinterpret_cast") parameters = replace(parameters, "float _Complex *" => "reinterpret_cast *>") parameters = replace(parameters, "double _Complex *" => "reinterpret_cast *>") parameters = replace(parameters, "short " => "sycl::bit_cast") parameters = replace(parameters, "float _Complex " => "static_cast >") parameters = replace(parameters, "double _Complex " => "static_cast >") parameters = replace(parameters, ", float *" => ", ") parameters = replace(parameters, ", double *" => ", ") parameters = replace(parameters, ", float " => ", ") parameters = replace(parameters, ", double " => ", ") parameters = replace(parameters, ", **" => ", ") parameters = replace(parameters, ", *" => ", ") parameters = replace(parameters, "onemklTranspose *trans," => "convert(trans, group_count),") parameters = replace(parameters, "onemklTranspose* trans," => "convert(trans, group_count),") parameters = replace(parameters, "onemklUplo *uplo," => "convert(uplo, group_count),") parameters = replace(parameters, "onemklUplo* uplo," => "convert(uplo, group_count),") parameters = replace(parameters, "onemklDiag *diag," => "convert(diag, group_count),") parameters = replace(parameters, "onemklDiag* diag," => "convert(diag, group_count),") parameters = replace(parameters, "onemklSide *side," => "convert(side, group_count),") parameters = replace(parameters, "onemklSide* side," => "convert(side, group_count),") for type in ("onemklTranspose", "onemklSide", "onemklUplo", "onemklDiag", "onemklGenerate", "onemklLayout", "onemklJob", "onemklJobsvd", "onemklCompz", "onemklRangev", "onemklIndex", "onemklProperty", "onemklMatrixView", "onemklMatmatRequest", "onemklOmatconvertAlg", "onemklOmataddAlg") parameters = replace(parameters, Regex("$type ([A-Za-z0-9_]+),") => SubstitutionString("convert(\\1),")) parameters = replace(parameters, Regex(", $type ([A-Za-z0-9_]+)") => SubstitutionString(", convert(\\1)")) end # Pass scalars (e.g. alpha/beta inputs) as references instead of values header = replace(header, "§" => "*") parameters = replace(parameters, ", §" => ", *") parameters = replace(parameters, ", sycl::bit_cast§" => ", *reinterpret_cast") parameters = replace(parameters, ", static_cast >§" => ", *reinterpret_cast *>") parameters = replace(parameters, ", static_cast >§" => ", *reinterpret_cast *>") parameters = replace(parameters, r"half>([A-Za-z0-9_]+)" => s"half>(\1)") parameters = replace(parameters, r" >([A-Za-z0-9_]+)" => s" >(\1)") parameters = replace(parameters, r" \*>([A-Za-z0-9_]+)" => s"*>(\1)") parameters = replace(parameters, r" \*\*>([A-Za-z0-9_]+)" => s"**>(\1)") variant = "" if library == "blas" variant = "column_major::" end # Build catch clause: LAPACK functions also catch computation_error for info lapack_catch = "catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; }" sycl_catch = "catch (const sycl::exception& e) { return -1; }" write(oneapi_cpp, "extern \"C\" $header {\n") if template type = version_types[version] if !occursin("scratchpad_size", name) catch_clause = library == "lapack" ? lapack_catch : sycl_catch write(oneapi_cpp, " try {\n") write(oneapi_cpp, " auto status = oneapi::mkl::$library::$variant$name<$type>($parameters, {});\n") write(oneapi_cpp, " device_queue->val.wait_and_throw();\n") write(oneapi_cpp, " } $catch_clause\n") end if occursin("scratchpad_size", name) write(oneapi_cpp, " int64_t scratchpad_size = oneapi::mkl::$library::$variant$name<$type>($parameters);\n device_queue->val.wait_and_throw();\n") end else if !(name ∈ void_output) has_queue = occursin("device_queue", parameters) is_scratchpad = occursin("scratchpad_size", name) if has_queue && !is_scratchpad catch_clause = library == "lapack" ? lapack_catch : sycl_catch write(oneapi_cpp, " try {\n") write(oneapi_cpp, " auto status = oneapi::mkl::$library::$variant$name($parameters, {});\n") write(oneapi_cpp, " device_queue->val.wait_and_throw();\n") write(oneapi_cpp, " } $catch_clause\n") else write(oneapi_cpp, " auto status = oneapi::mkl::$library::$variant$name($parameters, {});\n") if has_queue write(oneapi_cpp, " device_queue->val.wait_and_throw();\n") end end else if occursin("device_queue", parameters) write(oneapi_cpp, " try {\n") write(oneapi_cpp, " oneapi::mkl::$library::$variant$name($parameters);\n") write(oneapi_cpp, " device_queue->val.wait_and_throw();\n") write(oneapi_cpp, " } $sycl_catch\n") else write(oneapi_cpp, " oneapi::mkl::$library::$variant$name($parameters);\n") end end end if occursin("scratchpad_size", name) write(oneapi_cpp, " return scratchpad_size;\n") else write(oneapi_cpp, " return 0;\n") end write(oneapi_cpp, "}") write(oneapi_cpp, "\n\n") end close(oneapi_cpp) end # Generate "src/onemkl.h" generate_headers("blas", blas, "onemkl_blas.h", pattern="*") generate_headers("lapack", lapack, "onemkl_lapack.h", pattern="*") generate_headers("sparse", sparse, "onemkl_sparse.h", pattern="*") io = open("src/onemkl.h", "w") headers_prologue = read("onemkl_prologue.h", String) write(io, headers_prologue) headers_blas = read("onemkl_blas.h", String) write(io, "// BLAS\n") write(io, headers_blas) headers_lapack = read("onemkl_lapack.h", String) write(io, "// LAPACK\n") write(io, headers_lapack) headers_sparse = read("onemkl_sparse.h", String) write(io, "// SPARSE\n") write(io, headers_sparse) headers_epilogue = read("onemkl_epilogue.h", String) write(io, headers_epilogue) close(io) # Add the version of oneMKL in src/onemkl.h headers_onemkl = read("src/onemkl.h", String) version_onemkl = pkgversion(oneAPI_Support_Headers_jll) headers_onemkl = replace(headers_onemkl, "void onemkl_version" => "const int64_t ONEMKL_VERSION_MAJOR = $(version_onemkl.major);\nconst int64_t ONEMKL_VERSION_MINOR = $(version_onemkl.minor);\nconst int64_t ONEMKL_VERSION_PATCH = $(version_onemkl.patch);\nvoid onemkl_version") write("src/onemkl.h", headers_onemkl) # Generate "src/onemkl.cpp" generate_cpp("blas", blas, "onemkl_blas.cpp", pattern="§") generate_cpp("lapack", lapack, "onemkl_lapack.cpp", pattern="§") generate_cpp("sparse", sparse, "onemkl_sparse.cpp", pattern="§") io = open("src/onemkl.cpp", "w") cpp_prologue = read("onemkl_prologue.cpp", String) write(io, cpp_prologue) cpp_blas = read("onemkl_blas.cpp", String) write(io, "// BLAS\n") write(io, cpp_blas) cpp_lapack = read("onemkl_lapack.cpp", String) write(io, "// LAPACK\n") write(io, cpp_lapack) cpp_sparse = read("onemkl_sparse.cpp", String) write(io, "// SPARSE\n") write(io, cpp_sparse) cpp_epilogue = read("onemkl_epilogue.cpp", String) write(io, cpp_epilogue) close(io) ================================================ FILE: deps/onemkl_epilogue.cpp ================================================ extern "C" int onemklXsparse_matmat(syclQueue_t device_queue, matrix_handle_t A, matrix_handle_t B, matrix_handle_t C, onemklMatmatRequest req, matmat_descr_t descr, int64_t *sizeTempBuffer, void *tempBuffer) { auto status = oneapi::mkl::sparse::matmat(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) A, (oneapi::mkl::sparse::matrix_handle_t) B, (oneapi::mkl::sparse::matrix_handle_t) C, convert(req), (oneapi::mkl::sparse::matmat_descr_t) descr, sizeTempBuffer, tempBuffer, {}); device_queue->val.wait_and_throw(); return 0; } // other // oneMKL keeps a cache of SYCL queues and tries to destroy them when unloading the library. // that is incompatible with oneAPI.jl destroying queues before that, so call mkl_free_buffers // to manually wipe the device cache when we're destroying queues. extern "C" int onemklDestroy() { mkl_free_buffers(); return 0; } ================================================ FILE: deps/onemkl_epilogue.h ================================================ int onemklXsparse_matmat(syclQueue_t device_queue, matrix_handle_t A, matrix_handle_t B, matrix_handle_t C, onemklMatmatRequest req, matmat_descr_t descr, int64_t *sizeTempBuffer, void *tempBuffer); int onemklDestroy(void); #ifdef __cplusplus } #endif ================================================ FILE: deps/onemkl_prologue.cpp ================================================ #include "onemkl.h" #include "sycl.hpp" #include #include #include #include oneapi::mkl::transpose convert(onemklTranspose val) { switch (val) { case ONEMKL_TRANSPOSE_NONTRANS: return oneapi::mkl::transpose::nontrans; case ONEMKL_TRANSPOSE_TRANS: return oneapi::mkl::transpose::trans; case ONEMLK_TRANSPOSE_CONJTRANS: return oneapi::mkl::transpose::conjtrans; } } oneapi::mkl::transpose* convert(const onemklTranspose* vals, int64_t size) { oneapi::mkl::transpose* result = new oneapi::mkl::transpose[size]; for (int64_t i = 0; i < size; ++i) { switch (vals[i]) { case ONEMKL_TRANSPOSE_NONTRANS: result[i] = oneapi::mkl::transpose::nontrans; break; case ONEMKL_TRANSPOSE_TRANS: result[i] = oneapi::mkl::transpose::trans; break; case ONEMLK_TRANSPOSE_CONJTRANS: result[i] = oneapi::mkl::transpose::conjtrans; break; } } return result; } oneapi::mkl::uplo convert(onemklUplo val) { switch(val) { case ONEMKL_UPLO_UPPER: return oneapi::mkl::uplo::upper; case ONEMKL_UPLO_LOWER: return oneapi::mkl::uplo::lower; } } oneapi::mkl::uplo* convert(const onemklUplo* vals, int64_t size) { oneapi::mkl::uplo* result = new oneapi::mkl::uplo[size]; for (int64_t i = 0; i < size; ++i) { switch (vals[i]) { case ONEMKL_UPLO_UPPER: result[i] = oneapi::mkl::uplo::upper; break; case ONEMKL_UPLO_LOWER: result[i] = oneapi::mkl::uplo::lower; break; } } return result; } oneapi::mkl::diag convert(onemklDiag val) { switch(val) { case ONEMKL_DIAG_NONUNIT: return oneapi::mkl::diag::nonunit; case ONEMKL_DIAG_UNIT: return oneapi::mkl::diag::unit; } } oneapi::mkl::diag* convert(const onemklDiag* vals, int64_t size) { oneapi::mkl::diag* result = new oneapi::mkl::diag[size]; for (int64_t i = 0; i < size; ++i) { switch (vals[i]) { case ONEMKL_DIAG_NONUNIT: result[i] = oneapi::mkl::diag::nonunit; break; case ONEMKL_DIAG_UNIT: result[i] = oneapi::mkl::diag::unit; break; } } return result; } oneapi::mkl::side convert(onemklSide val) { switch (val) { case ONEMKL_SIDE_LEFT: return oneapi::mkl::side::left; case ONEMKL_SIDE_RIGHT: return oneapi::mkl::side::right; } } oneapi::mkl::side* convert(const onemklSide* vals, int64_t size) { oneapi::mkl::side* result = new oneapi::mkl::side[size]; for (int64_t i = 0; i < size; ++i) { switch (vals[i]) { case ONEMKL_SIDE_LEFT: result[i] = oneapi::mkl::side::left; break; case ONEMKL_SIDE_RIGHT: result[i] = oneapi::mkl::side::right; break; } } return result; } oneapi::mkl::offset convert(onemklOffset val) { switch (val) { case ONEMKL_OFFSET_ROW: return oneapi::mkl::offset::row; case ONEMKL_OFFSET_COL: return oneapi::mkl::offset::column; case ONEMKL_OFFSET_FIX: return oneapi::mkl::offset::fix; } } oneapi::mkl::job convert(onemklJob val) { switch (val) { case ONEMKL_JOB_N: return oneapi::mkl::job::N; case ONEMKL_JOB_V: return oneapi::mkl::job::V; case ONEMKL_JOB_U: return oneapi::mkl::job::U; case ONEMKL_JOB_A: return oneapi::mkl::job::A; case ONEMKL_JOB_S: return oneapi::mkl::job::S; case ONEMKL_JOB_O: return oneapi::mkl::job::O; } } oneapi::mkl::generate convert(onemklGenerate val) { switch (val) { case ONEMKL_GENERATE_Q: return oneapi::mkl::generate::Q; case ONEMKL_GENERATE_P: return oneapi::mkl::generate::P; case ONEMKL_GENERATE_N: return oneapi::mkl::generate::N; case ONEMKL_GENERATE_V: return oneapi::mkl::generate::V; } } oneapi::mkl::compz convert(onemklCompz val) { switch (val) { case ONEMKL_COMPZ_N: return oneapi::mkl::compz::N; case ONEMKL_COMPZ_V: return oneapi::mkl::compz::V; case ONEMKL_COMPZ_I: return oneapi::mkl::compz::I; } } oneapi::mkl::direct convert(onemklDirect val) { switch (val) { case ONEMKL_DIRECT_F: return oneapi::mkl::direct::F; case ONEMKL_DIRECT_B: return oneapi::mkl::direct::B; } } oneapi::mkl::storev convert(onemklStorev val) { switch (val) { case ONEMKL_STOREV_C: return oneapi::mkl::storev::C; case ONEMKL_STOREV_R: return oneapi::mkl::storev::R; } } oneapi::mkl::rangev convert(onemklRangev val) { switch (val) { case ONEMKL_RANGEV_A: return oneapi::mkl::rangev::A; case ONEMKL_RANGEV_V: return oneapi::mkl::rangev::V; case ONEMKL_RANGEV_I: return oneapi::mkl::rangev::I; } } oneapi::mkl::order convert(onemklOrder val) { switch (val) { case ONEMKL_ORDER_B: return oneapi::mkl::order::B; case ONEMKL_ORDER_E: return oneapi::mkl::order::E; } } oneapi::mkl::jobsvd convert(onemklJobsvd val) { switch (val) { case ONEMKL_JOBSVD_N: return oneapi::mkl::jobsvd::N; case ONEMKL_JOBSVD_A: return oneapi::mkl::jobsvd::A; case ONEMKL_JOBSVD_O: return oneapi::mkl::jobsvd::O; case ONEMKL_JOBSVD_S: return oneapi::mkl::jobsvd::S; } } oneapi::mkl::layout convert(onemklLayout val) { switch (val) { case ONEMKL_LAYOUT_ROW: return oneapi::mkl::layout::row_major; case ONEMKL_LAYOUT_COL: return oneapi::mkl::layout::col_major; } } oneapi::mkl::index_base convert(onemklIndex val) { switch (val) { case ONEMKL_INDEX_ZERO: return oneapi::mkl::index_base::zero; case ONEMKL_INDEX_ONE: return oneapi::mkl::index_base::one; } } oneapi::mkl::sparse::property convert(onemklProperty val) { switch (val) { case ONEMKL_PROPERTY_SYMMETRIC: return oneapi::mkl::sparse::property::symmetric; case ONEMKL_PROPERTY_SORTED: return oneapi::mkl::sparse::property::sorted; } } oneapi::mkl::sparse::matrix_view_descr convert(onemklMatrixView val) { switch (val) { case ONEMKL_MATRIX_VIEW_GENERAL: return oneapi::mkl::sparse::matrix_view_descr::general; } } oneapi::mkl::sparse::matmat_request convert(onemklMatmatRequest val) { switch (val) { case ONEMKL_MATMAT_REQUEST_GET_WORK_ESTIMATION_BUF_SIZE: return oneapi::mkl::sparse::matmat_request::get_work_estimation_buf_size; case ONEMKL_MATMAT_REQUEST_WORK_ESTIMATION: return oneapi::mkl::sparse::matmat_request::work_estimation; case ONEMKL_MATMAT_REQUEST_GET_COMPUTE_STRUCTURE_BUF_SIZE: return oneapi::mkl::sparse::matmat_request::get_compute_structure_buf_size; case ONEMKL_MATMAT_REQUEST_COMPUTE_STRUCTURE: return oneapi::mkl::sparse::matmat_request::compute_structure; case ONEMKL_MATMAT_REQUEST_FINALIZE_STRUCTURE: return oneapi::mkl::sparse::matmat_request::finalize_structure; case ONEMKL_MATMAT_REQUEST_GET_COMPUTE_BUF_SIZE: return oneapi::mkl::sparse::matmat_request::get_compute_buf_size; case ONEMKL_MATMAT_REQUEST_COMPUTE: return oneapi::mkl::sparse::matmat_request::compute; case ONEMKL_MATMAT_REQUEST_GET_NNZ: return oneapi::mkl::sparse::matmat_request::get_nnz; case ONEMKL_MATMAT_REQUEST_FINALIZE: return oneapi::mkl::sparse::matmat_request::finalize; } } oneapi::mkl::sparse::omatconvert_alg convert(onemklOmatconvertAlg val) { switch (val) { case ONEMKL_OMATCONVERT_DEFAULT_ALG: return oneapi::mkl::sparse::omatconvert_alg::default_alg; } } oneapi::mkl::sparse::omatadd_alg convert(onemklOmataddAlg val) { switch (val) { case ONEMKL_OMATADD_DEFAULT_ALG: return oneapi::mkl::sparse::omatadd_alg::default_alg; } } // version extern "C" void onemkl_version(int64_t *major, int64_t *minor, int64_t *patch) { *major = ONEMKL_VERSION_MAJOR; *minor = ONEMKL_VERSION_MINOR; *patch = ONEMKL_VERSION_PATCH; return; } // gemm // https://spec.oneapi.io/versions/1.0-rev-1/elements/oneMKL/source/domains/blas/gemm.html class gemmBatchInfo { public: oneapi::mkl::transpose *m_transa = nullptr; oneapi::mkl::transpose *m_transb = nullptr; sycl::device m_device; sycl::context m_context; oneapi::mkl::transpose m_ta; oneapi::mkl::transpose m_tb; // Constructor gemmBatchInfo(syclQueue_t device_queue, int64_t group_count, onemklTranspose transa, onemklTranspose transb) { // Get device and context info from device_queue auto main_queue = device_queue->val; m_device = main_queue.get_device(); m_context = main_queue.get_context(); // Allocate transpose shared buffers try { m_transa = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose), m_device, m_context); m_transb = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose), m_device, m_context); m_ta = convert(transa); m_tb = convert(transb); } catch(const std::bad_alloc& e) { std::cerr << "Error: " << e.what() << std::endl; } // Initialize for (int i = 0; i < group_count; i++) { m_transa[i] = m_ta; m_transb[i] = m_tb; } }; // Destructor ~gemmBatchInfo() { free(m_transa, m_context); free(m_transb, m_context); } }; class trsmBatchInfo { public: oneapi::mkl::transpose *m_transa = nullptr; oneapi::mkl::side *m_leftright = nullptr; oneapi::mkl::uplo *m_upperlower = nullptr; oneapi::mkl::diag *m_unitdiag = nullptr; sycl::device m_device; sycl::context m_context; oneapi::mkl::transpose m_ta; oneapi::mkl::side m_side; oneapi::mkl::uplo m_uplo; oneapi::mkl::diag m_diag; // Constructor trsmBatchInfo(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t group_count) { // Get device and context info from device_queue auto main_queue = device_queue->val; m_device = main_queue.get_device(); m_context = main_queue.get_context(); try { // Allocate uniform arrays of group_size and transpose_a, transpose_b supporting oneMKL // gemm_batch API m_transa = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose), m_device, m_context); m_leftright = (oneapi::mkl::side *) malloc_shared(group_count * sizeof(oneapi::mkl::side), m_device, m_context); m_upperlower = (oneapi::mkl::uplo *) malloc_shared(group_count * sizeof(oneapi::mkl::uplo), m_device, m_context); m_unitdiag = (oneapi::mkl::diag *) malloc_shared(group_count * sizeof(oneapi::mkl::diag), m_device, m_context); m_ta = convert(transa); m_side = convert(left_right); m_uplo = convert(upper_lower); m_diag = convert(unit_diag); } catch(const std::bad_alloc& e) { std::cerr << "Error: " << e.what() << std::endl; } // Initialize for (int i = 0; i < group_count; i++) { m_transa[i] = m_ta; m_leftright[i] = m_side; m_upperlower[i] = m_uplo; m_unitdiag[i] = m_diag; } }; // Destructor ~trsmBatchInfo() { free(m_transa, m_context); free(m_upperlower, m_context); free(m_unitdiag, m_context); free(m_leftright, m_context); } }; extern "C" int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, uint16_t *alpha, const short **a, int64_t *lda, const short **b, int64_t *ldb, uint16_t *beta, short **c, int64_t *ldc, int64_t group_count, int64_t *group_size) { gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], m, n, k, reinterpret_cast(alpha), reinterpret_cast(&a[0]), lda, reinterpret_cast(&b[0]), ldb, reinterpret_cast(beta), reinterpret_cast(&c[0]), ldc, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, const float **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, int64_t group_count, int64_t *group_size) { gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], m, n, k, alpha, (const float **)&a[0], lda, (const float **)&b[0], ldb, beta, &c[0], ldc, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, const double **b, int64_t *ldb, double *beta, double **c, int64_t *ldc, int64_t group_count, int64_t *group_size) { gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], m, n, k, alpha, (const double **)&a[0], lda, (const double **)&b[0], ldb, beta, &c[0], ldc, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, float _Complex *alpha, const float _Complex **a, int64_t *lda, const float _Complex **b, int64_t *ldb, float _Complex *beta, float _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_size) { gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], m, n, k, reinterpret_cast *>(alpha), reinterpret_cast **>(&a[0]), lda, reinterpret_cast **>(&b[0]), ldb, reinterpret_cast *>(beta), reinterpret_cast **>(&c[0]), ldc, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, double _Complex *alpha, const double _Complex **a, int64_t *lda, const double _Complex **b, int64_t *ldb, double _Complex *beta, double _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_size) { gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], m, n, k, reinterpret_cast *>(alpha), reinterpret_cast **>(&a[0]), lda, reinterpret_cast **>(&b[0]), ldb, reinterpret_cast *>(beta), reinterpret_cast **>(&c[0]), ldc, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, float *alpha, const float **a, int64_t *lda, float **b, int64_t *ldb, int64_t group_count, int64_t *group_size) { trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, unit_diag, group_count); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], m, n, alpha, (const float **)&a[0], lda, &b[0], ldb, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, double *alpha, const double **a, int64_t *lda, double **b, int64_t *ldb, int64_t group_count, int64_t *group_size) { trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, unit_diag, group_count); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], m, n, alpha, (const double **)&a[0], lda, &b[0], ldb, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, float _Complex *alpha, const float _Complex **a, int64_t *lda, float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_size) { trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, unit_diag, group_count); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], m, n, reinterpret_cast *>(alpha), reinterpret_cast **>(&a[0]), lda, reinterpret_cast **>(&b[0]), ldb, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, double _Complex *alpha, const double _Complex **a, int64_t *lda, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_size) { trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, unit_diag, group_count); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], m, n, reinterpret_cast *>(alpha), reinterpret_cast **>(&a[0]), lda, reinterpret_cast **>(&b[0]), ldb, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } ================================================ FILE: deps/onemkl_prologue.h ================================================ #pragma once #include "sycl.h" #include #include #ifdef __cplusplus extern "C" { #endif // BLAS types typedef enum { ONEMKL_TRANSPOSE_NONTRANS, ONEMKL_TRANSPOSE_TRANS, ONEMLK_TRANSPOSE_CONJTRANS } onemklTranspose; typedef enum { ONEMKL_UPLO_UPPER, ONEMKL_UPLO_LOWER } onemklUplo; typedef enum { ONEMKL_DIAG_NONUNIT, ONEMKL_DIAG_UNIT } onemklDiag; typedef enum { ONEMKL_SIDE_LEFT, ONEMKL_SIDE_RIGHT } onemklSide; typedef enum { ONEMKL_OFFSET_ROW, ONEMKL_OFFSET_COL, ONEMKL_OFFSET_FIX, } onemklOffset; // LAPACK types typedef enum { ONEMKL_JOB_N, ONEMKL_JOB_V, ONEMKL_JOB_U, ONEMKL_JOB_A, ONEMKL_JOB_S, ONEMKL_JOB_O } onemklJob; typedef enum { ONEMKL_GENERATE_Q, ONEMKL_GENERATE_P, ONEMKL_GENERATE_N, ONEMKL_GENERATE_V } onemklGenerate; typedef enum { ONEMKL_COMPZ_N, ONEMKL_COMPZ_V, ONEMKL_COMPZ_I } onemklCompz; typedef enum { ONEMKL_DIRECT_F, ONEMKL_DIRECT_B } onemklDirect; typedef enum { ONEMKL_STOREV_C, ONEMKL_STOREV_R } onemklStorev; typedef enum { ONEMKL_RANGEV_A, ONEMKL_RANGEV_V, ONEMKL_RANGEV_I } onemklRangev; typedef enum { ONEMKL_ORDER_B, ONEMKL_ORDER_E } onemklOrder; typedef enum { ONEMKL_JOBSVD_N, ONEMKL_JOBSVD_A, ONEMKL_JOBSVD_O, ONEMKL_JOBSVD_S } onemklJobsvd; typedef enum { ONEMKL_LAYOUT_ROW, ONEMKL_LAYOUT_COL, } onemklLayout; typedef enum { ONEMKL_INDEX_ZERO, ONEMKL_INDEX_ONE, } onemklIndex; // SPARSE types typedef enum { ONEMKL_PROPERTY_SYMMETRIC, ONEMKL_PROPERTY_SORTED, } onemklProperty; typedef enum { ONEMKL_MATRIX_VIEW_GENERAL, } onemklMatrixView; typedef enum { ONEMKL_MATMAT_REQUEST_GET_WORK_ESTIMATION_BUF_SIZE, ONEMKL_MATMAT_REQUEST_WORK_ESTIMATION, ONEMKL_MATMAT_REQUEST_GET_COMPUTE_STRUCTURE_BUF_SIZE, ONEMKL_MATMAT_REQUEST_COMPUTE_STRUCTURE, ONEMKL_MATMAT_REQUEST_FINALIZE_STRUCTURE, ONEMKL_MATMAT_REQUEST_GET_COMPUTE_BUF_SIZE, ONEMKL_MATMAT_REQUEST_COMPUTE, ONEMKL_MATMAT_REQUEST_GET_NNZ, ONEMKL_MATMAT_REQUEST_FINALIZE, } onemklMatmatRequest; typedef enum { ONEMKL_OMATCONVERT_DEFAULT_ALG, } onemklOmatconvertAlg; typedef enum { ONEMKL_OMATADD_DEFAULT_ALG, } onemklOmataddAlg; struct matrix_handle; typedef struct matrix_handle *matrix_handle_t; struct matmat_descr; typedef struct matmat_descr *matmat_descr_t; struct omatconvert_descr; typedef struct omatconvert_descr *omatconvert_descr_t; struct omatadd_descr; typedef struct omatadd_descr *omatadd_descr_t; void onemkl_version(int64_t *major, int64_t *minor, int64_t *patch); int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, uint16_t *alpha, const short **a, int64_t *lda, const short **b, int64_t *ldb, uint16_t *beta, short **c, int64_t *ldc, int64_t group_count, int64_t *group_size); int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, const float **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, int64_t group_count, int64_t *group_size); int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, const double **b, int64_t *ldb, double *beta, double **c, int64_t *ldc, int64_t group_count, int64_t *group_size); int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, float _Complex *alpha, const float _Complex **a, int64_t *lda, const float _Complex **b, int64_t *ldb, float _Complex *beta, float _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_size); int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, double _Complex *alpha, const double _Complex **a, int64_t *lda, const double _Complex **b, int64_t *ldb, double _Complex *beta, double _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_size); int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, float *alpha, const float **a, int64_t *lda, float **b, int64_t *ldb, int64_t group_count, int64_t *group_size); int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, double *alpha, const double **a, int64_t *lda, double **b, int64_t *ldb, int64_t group_count, int64_t *group_size); int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, float _Complex *alpha, const float _Complex **a, int64_t *lda, float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_size); int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, double _Complex *alpha, const double _Complex **a, int64_t *lda, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_size); ================================================ FILE: deps/src/onemkl.cpp ================================================ #include "onemkl.h" #include "sycl.hpp" #include #include #include #include oneapi::mkl::transpose convert(onemklTranspose val) { switch (val) { case ONEMKL_TRANSPOSE_NONTRANS: return oneapi::mkl::transpose::nontrans; case ONEMKL_TRANSPOSE_TRANS: return oneapi::mkl::transpose::trans; case ONEMLK_TRANSPOSE_CONJTRANS: return oneapi::mkl::transpose::conjtrans; } } oneapi::mkl::transpose* convert(const onemklTranspose* vals, int64_t size) { oneapi::mkl::transpose* result = new oneapi::mkl::transpose[size]; for (int64_t i = 0; i < size; ++i) { switch (vals[i]) { case ONEMKL_TRANSPOSE_NONTRANS: result[i] = oneapi::mkl::transpose::nontrans; break; case ONEMKL_TRANSPOSE_TRANS: result[i] = oneapi::mkl::transpose::trans; break; case ONEMLK_TRANSPOSE_CONJTRANS: result[i] = oneapi::mkl::transpose::conjtrans; break; } } return result; } oneapi::mkl::uplo convert(onemklUplo val) { switch(val) { case ONEMKL_UPLO_UPPER: return oneapi::mkl::uplo::upper; case ONEMKL_UPLO_LOWER: return oneapi::mkl::uplo::lower; } } oneapi::mkl::uplo* convert(const onemklUplo* vals, int64_t size) { oneapi::mkl::uplo* result = new oneapi::mkl::uplo[size]; for (int64_t i = 0; i < size; ++i) { switch (vals[i]) { case ONEMKL_UPLO_UPPER: result[i] = oneapi::mkl::uplo::upper; break; case ONEMKL_UPLO_LOWER: result[i] = oneapi::mkl::uplo::lower; break; } } return result; } oneapi::mkl::diag convert(onemklDiag val) { switch(val) { case ONEMKL_DIAG_NONUNIT: return oneapi::mkl::diag::nonunit; case ONEMKL_DIAG_UNIT: return oneapi::mkl::diag::unit; } } oneapi::mkl::diag* convert(const onemklDiag* vals, int64_t size) { oneapi::mkl::diag* result = new oneapi::mkl::diag[size]; for (int64_t i = 0; i < size; ++i) { switch (vals[i]) { case ONEMKL_DIAG_NONUNIT: result[i] = oneapi::mkl::diag::nonunit; break; case ONEMKL_DIAG_UNIT: result[i] = oneapi::mkl::diag::unit; break; } } return result; } oneapi::mkl::side convert(onemklSide val) { switch (val) { case ONEMKL_SIDE_LEFT: return oneapi::mkl::side::left; case ONEMKL_SIDE_RIGHT: return oneapi::mkl::side::right; } } oneapi::mkl::side* convert(const onemklSide* vals, int64_t size) { oneapi::mkl::side* result = new oneapi::mkl::side[size]; for (int64_t i = 0; i < size; ++i) { switch (vals[i]) { case ONEMKL_SIDE_LEFT: result[i] = oneapi::mkl::side::left; break; case ONEMKL_SIDE_RIGHT: result[i] = oneapi::mkl::side::right; break; } } return result; } oneapi::mkl::offset convert(onemklOffset val) { switch (val) { case ONEMKL_OFFSET_ROW: return oneapi::mkl::offset::row; case ONEMKL_OFFSET_COL: return oneapi::mkl::offset::column; case ONEMKL_OFFSET_FIX: return oneapi::mkl::offset::fix; } } oneapi::mkl::job convert(onemklJob val) { switch (val) { case ONEMKL_JOB_N: return oneapi::mkl::job::N; case ONEMKL_JOB_V: return oneapi::mkl::job::V; case ONEMKL_JOB_U: return oneapi::mkl::job::U; case ONEMKL_JOB_A: return oneapi::mkl::job::A; case ONEMKL_JOB_S: return oneapi::mkl::job::S; case ONEMKL_JOB_O: return oneapi::mkl::job::O; } } oneapi::mkl::generate convert(onemklGenerate val) { switch (val) { case ONEMKL_GENERATE_Q: return oneapi::mkl::generate::Q; case ONEMKL_GENERATE_P: return oneapi::mkl::generate::P; case ONEMKL_GENERATE_N: return oneapi::mkl::generate::N; case ONEMKL_GENERATE_V: return oneapi::mkl::generate::V; } } oneapi::mkl::compz convert(onemklCompz val) { switch (val) { case ONEMKL_COMPZ_N: return oneapi::mkl::compz::N; case ONEMKL_COMPZ_V: return oneapi::mkl::compz::V; case ONEMKL_COMPZ_I: return oneapi::mkl::compz::I; } } oneapi::mkl::direct convert(onemklDirect val) { switch (val) { case ONEMKL_DIRECT_F: return oneapi::mkl::direct::F; case ONEMKL_DIRECT_B: return oneapi::mkl::direct::B; } } oneapi::mkl::storev convert(onemklStorev val) { switch (val) { case ONEMKL_STOREV_C: return oneapi::mkl::storev::C; case ONEMKL_STOREV_R: return oneapi::mkl::storev::R; } } oneapi::mkl::rangev convert(onemklRangev val) { switch (val) { case ONEMKL_RANGEV_A: return oneapi::mkl::rangev::A; case ONEMKL_RANGEV_V: return oneapi::mkl::rangev::V; case ONEMKL_RANGEV_I: return oneapi::mkl::rangev::I; } } oneapi::mkl::order convert(onemklOrder val) { switch (val) { case ONEMKL_ORDER_B: return oneapi::mkl::order::B; case ONEMKL_ORDER_E: return oneapi::mkl::order::E; } } oneapi::mkl::jobsvd convert(onemklJobsvd val) { switch (val) { case ONEMKL_JOBSVD_N: return oneapi::mkl::jobsvd::N; case ONEMKL_JOBSVD_A: return oneapi::mkl::jobsvd::A; case ONEMKL_JOBSVD_O: return oneapi::mkl::jobsvd::O; case ONEMKL_JOBSVD_S: return oneapi::mkl::jobsvd::S; } } oneapi::mkl::layout convert(onemklLayout val) { switch (val) { case ONEMKL_LAYOUT_ROW: return oneapi::mkl::layout::row_major; case ONEMKL_LAYOUT_COL: return oneapi::mkl::layout::col_major; } } oneapi::mkl::index_base convert(onemklIndex val) { switch (val) { case ONEMKL_INDEX_ZERO: return oneapi::mkl::index_base::zero; case ONEMKL_INDEX_ONE: return oneapi::mkl::index_base::one; } } oneapi::mkl::sparse::property convert(onemklProperty val) { switch (val) { case ONEMKL_PROPERTY_SYMMETRIC: return oneapi::mkl::sparse::property::symmetric; case ONEMKL_PROPERTY_SORTED: return oneapi::mkl::sparse::property::sorted; } } oneapi::mkl::sparse::matrix_view_descr convert(onemklMatrixView val) { switch (val) { case ONEMKL_MATRIX_VIEW_GENERAL: return oneapi::mkl::sparse::matrix_view_descr::general; } } oneapi::mkl::sparse::matmat_request convert(onemklMatmatRequest val) { switch (val) { case ONEMKL_MATMAT_REQUEST_GET_WORK_ESTIMATION_BUF_SIZE: return oneapi::mkl::sparse::matmat_request::get_work_estimation_buf_size; case ONEMKL_MATMAT_REQUEST_WORK_ESTIMATION: return oneapi::mkl::sparse::matmat_request::work_estimation; case ONEMKL_MATMAT_REQUEST_GET_COMPUTE_STRUCTURE_BUF_SIZE: return oneapi::mkl::sparse::matmat_request::get_compute_structure_buf_size; case ONEMKL_MATMAT_REQUEST_COMPUTE_STRUCTURE: return oneapi::mkl::sparse::matmat_request::compute_structure; case ONEMKL_MATMAT_REQUEST_FINALIZE_STRUCTURE: return oneapi::mkl::sparse::matmat_request::finalize_structure; case ONEMKL_MATMAT_REQUEST_GET_COMPUTE_BUF_SIZE: return oneapi::mkl::sparse::matmat_request::get_compute_buf_size; case ONEMKL_MATMAT_REQUEST_COMPUTE: return oneapi::mkl::sparse::matmat_request::compute; case ONEMKL_MATMAT_REQUEST_GET_NNZ: return oneapi::mkl::sparse::matmat_request::get_nnz; case ONEMKL_MATMAT_REQUEST_FINALIZE: return oneapi::mkl::sparse::matmat_request::finalize; } } oneapi::mkl::sparse::omatconvert_alg convert(onemklOmatconvertAlg val) { switch (val) { case ONEMKL_OMATCONVERT_DEFAULT_ALG: return oneapi::mkl::sparse::omatconvert_alg::default_alg; } } oneapi::mkl::sparse::omatadd_alg convert(onemklOmataddAlg val) { switch (val) { case ONEMKL_OMATADD_DEFAULT_ALG: return oneapi::mkl::sparse::omatadd_alg::default_alg; } } // version extern "C" void onemkl_version(int64_t *major, int64_t *minor, int64_t *patch) { *major = ONEMKL_VERSION_MAJOR; *minor = ONEMKL_VERSION_MINOR; *patch = ONEMKL_VERSION_PATCH; return; } // gemm // https://spec.oneapi.io/versions/1.0-rev-1/elements/oneMKL/source/domains/blas/gemm.html class gemmBatchInfo { public: oneapi::mkl::transpose *m_transa = nullptr; oneapi::mkl::transpose *m_transb = nullptr; sycl::device m_device; sycl::context m_context; oneapi::mkl::transpose m_ta; oneapi::mkl::transpose m_tb; // Constructor gemmBatchInfo(syclQueue_t device_queue, int64_t group_count, onemklTranspose transa, onemklTranspose transb) { // Get device and context info from device_queue auto main_queue = device_queue->val; m_device = main_queue.get_device(); m_context = main_queue.get_context(); // Allocate transpose shared buffers try { m_transa = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose), m_device, m_context); m_transb = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose), m_device, m_context); m_ta = convert(transa); m_tb = convert(transb); } catch(const std::bad_alloc& e) { std::cerr << "Error: " << e.what() << std::endl; } // Initialize for (int i = 0; i < group_count; i++) { m_transa[i] = m_ta; m_transb[i] = m_tb; } }; // Destructor ~gemmBatchInfo() { free(m_transa, m_context); free(m_transb, m_context); } }; class trsmBatchInfo { public: oneapi::mkl::transpose *m_transa = nullptr; oneapi::mkl::side *m_leftright = nullptr; oneapi::mkl::uplo *m_upperlower = nullptr; oneapi::mkl::diag *m_unitdiag = nullptr; sycl::device m_device; sycl::context m_context; oneapi::mkl::transpose m_ta; oneapi::mkl::side m_side; oneapi::mkl::uplo m_uplo; oneapi::mkl::diag m_diag; // Constructor trsmBatchInfo(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t group_count) { // Get device and context info from device_queue auto main_queue = device_queue->val; m_device = main_queue.get_device(); m_context = main_queue.get_context(); try { // Allocate uniform arrays of group_size and transpose_a, transpose_b supporting oneMKL // gemm_batch API m_transa = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose), m_device, m_context); m_leftright = (oneapi::mkl::side *) malloc_shared(group_count * sizeof(oneapi::mkl::side), m_device, m_context); m_upperlower = (oneapi::mkl::uplo *) malloc_shared(group_count * sizeof(oneapi::mkl::uplo), m_device, m_context); m_unitdiag = (oneapi::mkl::diag *) malloc_shared(group_count * sizeof(oneapi::mkl::diag), m_device, m_context); m_ta = convert(transa); m_side = convert(left_right); m_uplo = convert(upper_lower); m_diag = convert(unit_diag); } catch(const std::bad_alloc& e) { std::cerr << "Error: " << e.what() << std::endl; } // Initialize for (int i = 0; i < group_count; i++) { m_transa[i] = m_ta; m_leftright[i] = m_side; m_upperlower[i] = m_uplo; m_unitdiag[i] = m_diag; } }; // Destructor ~trsmBatchInfo() { free(m_transa, m_context); free(m_upperlower, m_context); free(m_unitdiag, m_context); free(m_leftright, m_context); } }; extern "C" int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, uint16_t *alpha, const short **a, int64_t *lda, const short **b, int64_t *ldb, uint16_t *beta, short **c, int64_t *ldc, int64_t group_count, int64_t *group_size) { gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], m, n, k, reinterpret_cast(alpha), reinterpret_cast(&a[0]), lda, reinterpret_cast(&b[0]), ldb, reinterpret_cast(beta), reinterpret_cast(&c[0]), ldc, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, const float **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, int64_t group_count, int64_t *group_size) { gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], m, n, k, alpha, (const float **)&a[0], lda, (const float **)&b[0], ldb, beta, &c[0], ldc, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, const double **b, int64_t *ldb, double *beta, double **c, int64_t *ldc, int64_t group_count, int64_t *group_size) { gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], m, n, k, alpha, (const double **)&a[0], lda, (const double **)&b[0], ldb, beta, &c[0], ldc, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, float _Complex *alpha, const float _Complex **a, int64_t *lda, const float _Complex **b, int64_t *ldb, float _Complex *beta, float _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_size) { gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], m, n, k, reinterpret_cast *>(alpha), reinterpret_cast **>(&a[0]), lda, reinterpret_cast **>(&b[0]), ldb, reinterpret_cast *>(beta), reinterpret_cast **>(&c[0]), ldc, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, double _Complex *alpha, const double _Complex **a, int64_t *lda, const double _Complex **b, int64_t *ldb, double _Complex *beta, double _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_size) { gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, &gemmInfo.m_transa[0], &gemmInfo.m_transb[0], m, n, k, reinterpret_cast *>(alpha), reinterpret_cast **>(&a[0]), lda, reinterpret_cast **>(&b[0]), ldb, reinterpret_cast *>(beta), reinterpret_cast **>(&c[0]), ldc, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, float *alpha, const float **a, int64_t *lda, float **b, int64_t *ldb, int64_t group_count, int64_t *group_size) { trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, unit_diag, group_count); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], m, n, alpha, (const float **)&a[0], lda, &b[0], ldb, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, double *alpha, const double **a, int64_t *lda, double **b, int64_t *ldb, int64_t group_count, int64_t *group_size) { trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, unit_diag, group_count); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], m, n, alpha, (const double **)&a[0], lda, &b[0], ldb, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, float _Complex *alpha, const float _Complex **a, int64_t *lda, float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_size) { trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, unit_diag, group_count); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], m, n, reinterpret_cast *>(alpha), reinterpret_cast **>(&a[0]), lda, reinterpret_cast **>(&b[0]), ldb, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } extern "C" int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, double _Complex *alpha, const double _Complex **a, int64_t *lda, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_size) { trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa, unit_diag, group_count); device_queue->val.wait_and_throw(); auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0], &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0], m, n, reinterpret_cast *>(alpha), reinterpret_cast **>(&a[0]), lda, reinterpret_cast **>(&b[0]), ldb, group_count, group_size, {}); device_queue->val.wait_and_throw(); return 0; } // BLAS extern "C" int onemklHgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, short *alpha, short *a, int64_t lda, short *b, int64_t ldb, short *beta, short *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, *reinterpret_cast(alpha), reinterpret_cast(a), lda, reinterpret_cast(b), ldb, *reinterpret_cast(beta), reinterpret_cast(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklChemm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::hemm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZhemm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::hemm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *beta, float *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, a, lda, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *beta, double *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, a, lda, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *beta, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *beta, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCherk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float _Complex *a, int64_t lda, float *beta, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::herk(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, reinterpret_cast*>(a), lda, *beta, reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZherk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double _Complex *a, int64_t lda, double *beta, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::herk(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, reinterpret_cast*>(a), lda, *beta, reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float *beta, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::her2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *beta, reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double *beta, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::her2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *beta, reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float *a, int64_t lda, float *x, int64_t incx, float *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, a, lda, x, incx, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double *a, int64_t lda, double *x, int64_t incx, double *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, a, lda, x, incx, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, *alpha, a, lda, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, *alpha, a, lda, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, *alpha, a, lda, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, *alpha, a, lda, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSger(syclQueue_t device_queue, int64_t m, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy, float *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::ger(device_queue->val, m, n, *alpha, x, incx, y, incy, a, lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDger(syclQueue_t device_queue, int64_t m, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy, double *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::ger(device_queue->val, m, n, *alpha, x, incx, y, incy, a, lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgerc(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::gerc(device_queue->val, m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgerc(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::gerc(device_queue->val, m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgeru(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::geru(device_queue->val, m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgeru(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::geru(device_queue->val, m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklChbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::hbmv(device_queue->val, convert(upper_lower), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZhbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::hbmv(device_queue->val, convert(upper_lower), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklChemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::hemv(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZhemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::hemv(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float _Complex *x, int64_t incx, float _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::her(device_queue->val, convert(upper_lower), n, *alpha, reinterpret_cast*>(x), incx, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double _Complex *x, int64_t incx, double _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::her(device_queue->val, convert(upper_lower), n, *alpha, reinterpret_cast*>(x), incx, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::her2(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::her2(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklChpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *a, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::hpmv(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZhpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *a, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::hpmv(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklChpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float _Complex *x, int64_t incx, float _Complex *a) { try { auto status = oneapi::mkl::blas::column_major::hpr(device_queue->val, convert(upper_lower), n, *alpha, reinterpret_cast*>(x), incx, reinterpret_cast*>(a), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZhpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double _Complex *x, int64_t incx, double _Complex *a) { try { auto status = oneapi::mkl::blas::column_major::hpr(device_queue->val, convert(upper_lower), n, *alpha, reinterpret_cast*>(x), incx, reinterpret_cast*>(a), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklChpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a) { try { auto status = oneapi::mkl::blas::column_major::hpr2(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZhpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a) { try { auto status = oneapi::mkl::blas::column_major::hpr2(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::sbmv(device_queue->val, convert(upper_lower), n, k, *alpha, a, lda, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::sbmv(device_queue->val, convert(upper_lower), n, k, *alpha, a, lda, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, *alpha, a, lda, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, *alpha, a, lda, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, *alpha, x, incx, a, lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, *alpha, x, incx, a, lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy, float *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, *alpha, x, incx, y, incy, a, lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy, double *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, *alpha, x, incx, y, incy, a, lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) { try { auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(a), lda, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *a, float *x, int64_t incx, float *beta, float *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::spmv(device_queue->val, convert(upper_lower), n, *alpha, a, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *a, double *x, int64_t incx, double *beta, double *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::spmv(device_queue->val, convert(upper_lower), n, *alpha, a, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *a) { try { auto status = oneapi::mkl::blas::column_major::spr(device_queue->val, convert(upper_lower), n, *alpha, x, incx, a, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *a) { try { auto status = oneapi::mkl::blas::column_major::spr(device_queue->val, convert(upper_lower), n, *alpha, x, incx, a, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy, float *a) { try { auto status = oneapi::mkl::blas::column_major::spr2(device_queue->val, convert(upper_lower), n, *alpha, x, incx, y, incy, a, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy, double *a) { try { auto status = oneapi::mkl::blas::column_major::spr2(device_queue->val, convert(upper_lower), n, *alpha, x, incx, y, incy, a, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float *a, int64_t lda, float *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double *a, int64_t lda, double *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float *a, int64_t lda, float *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double *a, int64_t lda, double *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, float *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, double *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, float _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, double _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, float *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, double *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, float _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, double _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, int64_t lda, float *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, int64_t lda, double *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, int64_t lda, float *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, int64_t lda, double *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCdotc(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *result) { try { auto status = oneapi::mkl::blas::column_major::dotc(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(result), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZdotc(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *result) { try { auto status = oneapi::mkl::blas::column_major::dotc(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(result), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCdotu(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *result) { try { auto status = oneapi::mkl::blas::column_major::dotu(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(result), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZdotu(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *result) { try { auto status = oneapi::mkl::blas::column_major::dotu(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, reinterpret_cast*>(result), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSiamax(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int32_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSiamax_64(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDiamax(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int32_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDiamax_64(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCiamax(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int32_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, reinterpret_cast*>(x), incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCiamax_64(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, reinterpret_cast*>(x), incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZiamax(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int32_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, reinterpret_cast*>(x), incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZiamax_64(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, reinterpret_cast*>(x), incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSiamin(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int32_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSiamin_64(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDiamin(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int32_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDiamin_64(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCiamin(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int32_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, reinterpret_cast*>(x), incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCiamin_64(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, reinterpret_cast*>(x), incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZiamin(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int32_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, reinterpret_cast*>(x), incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZiamin_64(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t *result, onemklIndex base) { try { auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, reinterpret_cast*>(x), incx, result, convert(base), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSasum(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *result) { try { auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, x, incx, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDasum(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *result) { try { auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, x, incx, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCasum(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float *result) { try { auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, reinterpret_cast*>(x), incx, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZasum(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double *result) { try { auto status = oneapi::mkl::blas::column_major::asum(device_queue->val, n, reinterpret_cast*>(x), incx, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklHaxpy(syclQueue_t device_queue, int64_t n, short *alpha, short *x, int64_t incx, short *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, *reinterpret_cast(alpha), reinterpret_cast(x), incx, reinterpret_cast(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSaxpy(syclQueue_t device_queue, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, *alpha, x, incx, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDaxpy(syclQueue_t device_queue, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, *alpha, x, incx, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCaxpy(syclQueue_t device_queue, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZaxpy(syclQueue_t device_queue, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::axpy(device_queue->val, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSaxpby(syclQueue_t device_queue, int64_t n, float *alpha, float *x, int64_t incx, float *beta, float *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, *alpha, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDaxpby(syclQueue_t device_queue, int64_t n, double *alpha, double *x, int64_t incx, double *beta, double *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, *alpha, x, incx, *beta, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCaxpby(syclQueue_t device_queue, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZaxpby(syclQueue_t device_queue, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::axpby(device_queue->val, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklScopy(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, x, incx, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDcopy(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, x, incx, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCcopy(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZcopy(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::copy(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklHdot(syclQueue_t device_queue, int64_t n, short *x, int64_t incx, short *y, int64_t incy, short *result) { try { auto status = oneapi::mkl::blas::column_major::dot(device_queue->val, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy, reinterpret_cast(result), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSdot(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, float *result) { try { auto status = oneapi::mkl::blas::column_major::dot(device_queue->val, n, x, incx, y, incy, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDdot(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, double *result) { try { auto status = oneapi::mkl::blas::column_major::dot(device_queue->val, n, x, incx, y, incy, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsdsdot(syclQueue_t device_queue, int64_t n, float *sb, float *x, int64_t incx, float *y, int64_t incy, float *result) { try { auto status = oneapi::mkl::blas::column_major::sdsdot(device_queue->val, n, *sb, x, incx, y, incy, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklHnrm2(syclQueue_t device_queue, int64_t n, short *x, int64_t incx, short *result) { try { auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, reinterpret_cast(x), incx, reinterpret_cast(result), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSnrm2(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *result) { try { auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, x, incx, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDnrm2(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *result) { try { auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, x, incx, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCnrm2(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float *result) { try { auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, reinterpret_cast*>(x), incx, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZnrm2(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double *result) { try { auto status = oneapi::mkl::blas::column_major::nrm2(device_queue->val, n, reinterpret_cast*>(x), incx, result, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklHrot(syclQueue_t device_queue, int64_t n, short *x, int64_t incx, short *y, int64_t incy, short *c, short *s) { try { auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, reinterpret_cast(x), incx, reinterpret_cast(y), incy, *reinterpret_cast(c), *reinterpret_cast(s), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSrot(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, float *c, float *s) { try { auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, x, incx, y, incy, *c, *s, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDrot(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, double *c, double *s) { try { auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, x, incx, y, incy, *c, *s, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCSrot(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float *c, float *s) { try { auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, *c, *s, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCrot(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float *c, float _Complex *s) { try { auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, *c, *reinterpret_cast*>(s), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZDrot(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double *c, double *s) { try { auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, *c, *s, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZrot(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double *c, double _Complex *s) { try { auto status = oneapi::mkl::blas::column_major::rot(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, *c, *reinterpret_cast*>(s), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSrotg(syclQueue_t device_queue, float *a, float *b, float *c, float *s) { try { auto status = oneapi::mkl::blas::column_major::rotg(device_queue->val, a, b, c, s, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDrotg(syclQueue_t device_queue, double *a, double *b, double *c, double *s) { try { auto status = oneapi::mkl::blas::column_major::rotg(device_queue->val, a, b, c, s, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCrotg(syclQueue_t device_queue, float _Complex *a, float _Complex *b, float *c, float _Complex *s) { try { auto status = oneapi::mkl::blas::column_major::rotg(device_queue->val, reinterpret_cast*>(a), reinterpret_cast*>(b), c, reinterpret_cast*>(s), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZrotg(syclQueue_t device_queue, double _Complex *a, double _Complex *b, double *c, double _Complex *s) { try { auto status = oneapi::mkl::blas::column_major::rotg(device_queue->val, reinterpret_cast*>(a), reinterpret_cast*>(b), c, reinterpret_cast*>(s), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSrotm(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, float *param) { try { auto status = oneapi::mkl::blas::column_major::rotm(device_queue->val, n, x, incx, y, incy, param, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDrotm(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, double *param) { try { auto status = oneapi::mkl::blas::column_major::rotm(device_queue->val, n, x, incx, y, incy, param, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSrotmg(syclQueue_t device_queue, float *d1, float *d2, float *x1, float *y1, float *param) { try { auto status = oneapi::mkl::blas::column_major::rotmg(device_queue->val, d1, d2, x1, *y1, param, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDrotmg(syclQueue_t device_queue, double *d1, double *d2, double *x1, double *y1, double *param) { try { auto status = oneapi::mkl::blas::column_major::rotmg(device_queue->val, d1, d2, x1, *y1, param, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklHscal(syclQueue_t device_queue, int64_t n, short *alpha, short *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, *reinterpret_cast(alpha), reinterpret_cast(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSscal(syclQueue_t device_queue, int64_t n, float *alpha, float *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, *alpha, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDscal(syclQueue_t device_queue, int64_t n, double *alpha, double *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, *alpha, x, incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCSscal(syclQueue_t device_queue, int64_t n, float *alpha, float _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, *alpha, reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZDscal(syclQueue_t device_queue, int64_t n, double *alpha, double _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, *alpha, reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCscal(syclQueue_t device_queue, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZscal(syclQueue_t device_queue, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx) { try { auto status = oneapi::mkl::blas::column_major::scal(device_queue->val, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSswap(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, x, incx, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDswap(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, x, incx, y, incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCswap(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZswap(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy) { try { auto status = oneapi::mkl::blas::column_major::swap(device_queue->val, n, reinterpret_cast*>(x), incx, reinterpret_cast*>(y), incy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklHgemm_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, short *alpha, short *a, int64_t lda, int64_t stride_a, short *b, int64_t ldb, int64_t stride_b, short *beta, short *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), convert(transb), m, n, k, *reinterpret_cast(alpha), reinterpret_cast(a), lda, stride_a, reinterpret_cast(b), ldb, stride_b, *reinterpret_cast(beta), reinterpret_cast(c), ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgemm_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, float *beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), convert(transb), m, n, k, *alpha, a, lda, stride_a, b, ldb, stride_b, *beta, c, ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgemm_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, double *beta, double *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), convert(transb), m, n, k, *alpha, a, lda, stride_a, b, ldb, stride_b, *beta, c, ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgemm_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, float _Complex *beta, float _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), convert(transb), m, n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgemm_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, double _Complex *beta, double _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val, convert(transa), convert(transb), m, n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, int64_t stride_a, float *beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::syrk_batch(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, a, lda, stride_a, *beta, c, ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, int64_t stride_a, double *beta, double *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::syrk_batch(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, a, lda, stride_a, *beta, c, ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *beta, float _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::syrk_batch(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stride_a, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *beta, double _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::syrk_batch(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stride_a, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStrsm_batch_strided(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtrsm_batch_strided(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtrsm_batch_strided(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtrsm_batch_strided(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgemv_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, int64_t stridea, float *x, int64_t incx, int64_t stridex, float *beta, float *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::gemv_batch(device_queue->val, convert(trans), m, n, *alpha, a, lda, stridea, x, incx, stridex, *beta, y, incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgemv_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, int64_t stridea, double *x, int64_t incx, int64_t stridex, double *beta, double *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::gemv_batch(device_queue->val, convert(trans), m, n, *alpha, a, lda, stridea, x, incx, stridex, *beta, y, incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgemv_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stridea, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *beta, float _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::gemv_batch(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stridea, reinterpret_cast*>(x), incx, stridex, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgemv_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stridea, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *beta, double _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::gemv_batch(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stridea, reinterpret_cast*>(x), incx, stridex, *reinterpret_cast*>(beta), reinterpret_cast*>(y), incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float *a, int64_t lda, int64_t stridea, float *x, int64_t incx, int64_t stridex, float *c, int64_t ldc, int64_t stridec, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::dgmm_batch(device_queue->val, convert(left_right), m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double *a, int64_t lda, int64_t stridea, double *x, int64_t incx, int64_t stridex, double *c, int64_t ldc, int64_t stridec, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::dgmm_batch(device_queue->val, convert(left_right), m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stridea, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *c, int64_t ldc, int64_t stridec, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::dgmm_batch(device_queue->val, convert(left_right), m, n, reinterpret_cast*>(a), lda, stridea, reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(c), ldc, stridec, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stridea, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *c, int64_t ldc, int64_t stridec, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::dgmm_batch(device_queue->val, convert(left_right), m, n, reinterpret_cast*>(a), lda, stridea, reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(c), ldc, stridec, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSaxpy_batch_strided(syclQueue_t device_queue, int64_t n, float *alpha, float *x, int64_t incx, int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::axpy_batch(device_queue->val, n, *alpha, x, incx, stridex, y, incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDaxpy_batch_strided(syclQueue_t device_queue, int64_t n, double *alpha, double *x, int64_t incx, int64_t stridex, double *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::axpy_batch(device_queue->val, n, *alpha, x, incx, stridex, y, incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCaxpy_batch_strided(syclQueue_t device_queue, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::axpy_batch(device_queue->val, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(y), incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZaxpy_batch_strided(syclQueue_t device_queue, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::axpy_batch(device_queue->val, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(y), incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklScopy_batch_strided(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::copy_batch(device_queue->val, n, x, incx, stridex, y, incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDcopy_batch_strided(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t stridex, double *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::copy_batch(device_queue->val, n, x, incx, stridex, y, incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCcopy_batch_strided(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::copy_batch(device_queue->val, n, reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(y), incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZcopy_batch_strided(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::copy_batch(device_queue->val, n, reinterpret_cast*>(x), incx, stridex, reinterpret_cast*>(y), incy, stridey, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::gemmt(device_queue->val, convert(upper_lower), convert(transa), convert(transb), n, k, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::gemmt(device_queue->val, convert(upper_lower), convert(transa), convert(transb), n, k, *alpha, a, lda, b, ldb, *beta, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::gemmt(device_queue->val, convert(upper_lower), convert(transa), convert(transb), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::gemmt(device_queue->val, convert(upper_lower), convert(transa), convert(transb), n, k, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *reinterpret_cast*>(beta), reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *ab, int64_t lda, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::imatcopy(device_queue->val, convert(trans), m, n, *alpha, ab, lda, ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *ab, int64_t lda, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::imatcopy(device_queue->val, convert(trans), m, n, *alpha, ab, lda, ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *ab, int64_t lda, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::imatcopy(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(ab), lda, ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *ab, int64_t lda, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::imatcopy(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(ab), lda, ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::omatcopy(device_queue->val, convert(trans), m, n, *alpha, a, lda, b, ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::omatcopy(device_queue->val, convert(trans), m, n, *alpha, a, lda, b, ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklComatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::omatcopy(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb) { try { auto status = oneapi::mkl::blas::column_major::omatcopy(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *beta, float *b, int64_t ldb, float *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::omatadd(device_queue->val, convert(transa), convert(transb), m, n, *alpha, a, lda, *beta, b, ldb, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *beta, double *b, int64_t ldb, double *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::omatadd(device_queue->val, convert(transa), convert(transb), m, n, *alpha, a, lda, *beta, b, ldb, c, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklComatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *beta, float _Complex *b, int64_t ldb, float _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::omatadd(device_queue->val, convert(transa), convert(transb), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, *reinterpret_cast*>(beta), reinterpret_cast*>(b), ldb, reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *beta, double _Complex *b, int64_t ldb, double _Complex *c, int64_t ldc) { try { auto status = oneapi::mkl::blas::column_major::omatadd(device_queue->val, convert(transa), convert(transb), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, *reinterpret_cast*>(beta), reinterpret_cast*>(b), ldb, reinterpret_cast*>(c), ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSimatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::imatcopy_batch(device_queue->val, convert(trans), m, n, *alpha, ab, lda, ldb, stride, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDimatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::imatcopy_batch(device_queue->val, convert(trans), m, n, *alpha, ab, lda, ldb, stride, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCimatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::imatcopy_batch(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(ab), lda, ldb, stride, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZimatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::imatcopy_batch(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(ab), lda, ldb, stride, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSomatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::omatcopy_batch(device_queue->val, convert(trans), m, n, *alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDomatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::omatcopy_batch(device_queue->val, convert(trans), m, n, *alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklComatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::omatcopy_batch(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZomatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::omatcopy_batch(device_queue->val, convert(trans), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSomatadd_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, int64_t stride_a, float *beta, float *b, int64_t ldb, int64_t stride_b, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::omatadd_batch(device_queue->val, convert(transa), convert(transb), m, n, *alpha, a, lda, stride_a, *beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDomatadd_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, int64_t stride_a, double *beta, double *b, int64_t ldb, int64_t stride_b, double *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::omatadd_batch(device_queue->val, convert(transa), convert(transb), m, n, *alpha, a, lda, stride_a, *beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklComatadd_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *beta, float _Complex *b, int64_t ldb, int64_t stride_b, float _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::omatadd_batch(device_queue->val, convert(transa), convert(transb), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stride_a, *reinterpret_cast*>(beta), reinterpret_cast*>(b), ldb, stride_b, reinterpret_cast*>(c), ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZomatadd_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *beta, double _Complex *b, int64_t ldb, int64_t stride_b, double _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size) { try { auto status = oneapi::mkl::blas::column_major::omatadd_batch(device_queue->val, convert(transa), convert(transb), m, n, *reinterpret_cast*>(alpha), reinterpret_cast*>(a), lda, stride_a, *reinterpret_cast*>(beta), reinterpret_cast*>(b), ldb, stride_b, reinterpret_cast*>(c), ldc, stride_c, batch_size, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } // LAPACK extern "C" int onemklSpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf(device_queue->val, convert(uplo), n, a, lda, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf(device_queue->val, convert(uplo), n, a, lda, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float *a, int64_t lda, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs(device_queue->val, convert(uplo), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double *a, int64_t lda, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs(device_queue->val, convert(uplo), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs(device_queue->val, convert(uplo), n, nrhs, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs(device_queue->val, convert(uplo), n, nrhs, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potri(device_queue->val, convert(uplo), n, a, lda, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potri(device_queue->val, convert(uplo), n, a, lda, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potri(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potri(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, float *a, int64_t lda, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtri(device_queue->val, convert(uplo), convert(diag), n, a, lda, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, double *a, int64_t lda, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtri(device_queue->val, convert(uplo), convert(diag), n, a, lda, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtri(device_queue->val, convert(uplo), convert(diag), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtri(device_queue->val, convert(uplo), convert(diag), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t *ipiv, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesv(device_queue->val, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t *ipiv, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesv(device_queue->val, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesv(device_queue->val, n, nrhs, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesv(device_queue->val, n, nrhs, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgebrd(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float *d, float *e, float _Complex *tauq, float _Complex *taup, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gebrd(device_queue->val, m, n, reinterpret_cast*>(a), lda, d, e, reinterpret_cast*>(tauq), reinterpret_cast*>(taup), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgebrd(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *d, double *e, double *tauq, double *taup, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gebrd(device_queue->val, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgebrd(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *d, float *e, float *tauq, float *taup, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gebrd(device_queue->val, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgebrd(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double *d, double *e, double _Complex *tauq, double _Complex *taup, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gebrd(device_queue->val, m, n, reinterpret_cast*>(a), lda, d, e, reinterpret_cast*>(tauq), reinterpret_cast*>(taup), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf(device_queue->val, m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf(device_queue->val, m, n, a, lda, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf(device_queue->val, m, n, a, lda, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf(device_queue->val, m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, float _Complex *a, int64_t lda, float *s, float _Complex *u, int64_t ldu, float _Complex *vt, int64_t ldvt, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesvd(device_queue->val, convert(jobu), convert(jobvt), m, n, reinterpret_cast*>(a), lda, s, reinterpret_cast*>(u), ldu, reinterpret_cast*>(vt), ldvt, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, double _Complex *a, int64_t lda, double *s, double _Complex *u, int64_t ldu, double _Complex *vt, int64_t ldvt, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesvd(device_queue->val, convert(jobu), convert(jobvt), m, n, reinterpret_cast*>(a), lda, s, reinterpret_cast*>(u), ldu, reinterpret_cast*>(vt), ldvt, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, double *a, int64_t lda, double *s, double *u, int64_t ldu, double *vt, int64_t ldvt, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesvd(device_queue->val, convert(jobu), convert(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, float *a, int64_t lda, float *s, float *u, int64_t ldu, float *vt, int64_t ldvt, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesvd(device_queue->val, convert(jobu), convert(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm, int64_t *irank, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, float *s, int64_t stride_s, float _Complex *u, int64_t ldu, int64_t stride_u, float _Complex *vt, int64_t ldvt, int64_t stride_vt, float *tolerance, float *residual, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesvda_batch(device_queue->val, iparm, irank, m, n, reinterpret_cast*>(a), lda, stride_a, s, stride_s, reinterpret_cast*>(u), ldu, stride_u, reinterpret_cast*>(vt), ldvt, stride_vt, *tolerance, residual, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm, int64_t *irank, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, double *s, int64_t stride_s, double *u, int64_t ldu, int64_t stride_u, double *vt, int64_t ldvt, int64_t stride_vt, double *tolerance, double *residual, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesvda_batch(device_queue->val, iparm, irank, m, n, a, lda, stride_a, s, stride_s, u, ldu, stride_u, vt, ldvt, stride_vt, *tolerance, residual, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm, int64_t *irank, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, float *s, int64_t stride_s, float *u, int64_t ldu, int64_t stride_u, float *vt, int64_t ldvt, int64_t stride_vt, float *tolerance, float *residual, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesvda_batch(device_queue->val, iparm, irank, m, n, a, lda, stride_a, s, stride_s, u, ldu, stride_u, vt, ldvt, stride_vt, *tolerance, residual, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm, int64_t *irank, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, double *s, int64_t stride_s, double _Complex *u, int64_t ldu, int64_t stride_u, double _Complex *vt, int64_t ldvt, int64_t stride_vt, double *tolerance, double *residual, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gesvda_batch(device_queue->val, iparm, irank, m, n, reinterpret_cast*>(a), lda, stride_a, s, stride_s, reinterpret_cast*>(u), ldu, stride_u, reinterpret_cast*>(vt), ldvt, stride_vt, *tolerance, residual, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetrf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf(device_queue->val, m, n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetrf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t *ipiv, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf(device_queue->val, m, n, a, lda, ipiv, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetrf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t *ipiv, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf(device_queue->val, m, n, a, lda, ipiv, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetrf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf(device_queue->val, m, n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float _Complex **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, reinterpret_cast**>(a), lda, ipiv, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double _Complex **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, reinterpret_cast**>(a), lda, ipiv, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, ipiv, stride_ipiv, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrf_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, ipiv, stride_ipiv, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp(device_queue->val, m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp(device_queue->val, m, n, a, lda, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp(device_queue->val, m, n, a, lda, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp(device_queue->val, m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, reinterpret_cast**>(a), lda, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, reinterpret_cast**>(a), lda, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrfnp_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetri(syclQueue_t device_queue, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri(device_queue->val, n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetri(syclQueue_t device_queue, int64_t n, double *a, int64_t lda, int64_t *ipiv, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri(device_queue->val, n, a, lda, ipiv, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetri(syclQueue_t device_queue, int64_t n, float *a, int64_t lda, int64_t *ipiv, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri(device_queue->val, n, a, lda, ipiv, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetri(syclQueue_t device_queue, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri(device_queue->val, n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t *ipiv, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs(device_queue->val, convert(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t *ipiv, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs(device_queue->val, convert(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, stride_a, ipiv, stride_ipiv, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, stride_a, ipiv, stride_ipiv, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrsnp_batch(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrsnp_batch(device_queue->val, convert(trans), n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrsnp_batch(device_queue->val, convert(trans), n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrsnp_batch(device_queue->val, convert(trans), n, nrhs, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCheev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *w, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::heev(device_queue->val, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, w, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZheev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *w, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::heev(device_queue->val, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, w, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCheevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *w, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::heevd(device_queue->val, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, w, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZheevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *w, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::heevd(device_queue->val, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, w, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCheevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t *m, float *w, float _Complex *z, int64_t ldz, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::heevx(device_queue->val, convert(jobz), convert(range), convert(uplo), n, reinterpret_cast*>(a), lda, *vl, *vu, il, iu, *abstol, m, w, reinterpret_cast*>(z), ldz, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZheevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t *m, double *w, double _Complex *z, int64_t ldz, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::heevx(device_queue->val, convert(jobz), convert(range), convert(uplo), n, reinterpret_cast*>(a), lda, *vl, *vu, il, iu, *abstol, m, w, reinterpret_cast*>(z), ldz, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklChegvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float *w, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::hegvd(device_queue->val, itype, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, w, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZhegvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double *w, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::hegvd(device_queue->val, itype, convert(jobz), convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, w, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklChegvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t *m, float *w, float _Complex *z, int64_t ldz, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::hegvx(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *vl, *vu, il, iu, *abstol, m, w, reinterpret_cast*>(z), ldz, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZhegvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t *m, double *w, double _Complex *z, int64_t ldz, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::hegvx(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, *vl, *vu, il, iu, *abstol, m, w, reinterpret_cast*>(z), ldz, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklChetrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *d, float *e, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::hetrd(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, d, e, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZhetrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *d, double *e, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::hetrd(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, d, e, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklChetrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::hetrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZhetrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::hetrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSorgbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::orgbr(device_queue->val, convert(vec), m, n, k, a, lda, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDorgbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::orgbr(device_queue->val, convert(vec), m, n, k, a, lda, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDorgqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::orgqr(device_queue->val, m, n, k, a, lda, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSorgqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::orgqr(device_queue->val, m, n, k, a, lda, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDormqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *c, int64_t ldc, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ormqr(device_queue->val, convert(side), convert(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSormqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ormqr(device_queue->val, convert(side), convert(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, float *d, float *e, float _Complex *z, int64_t ldz, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::steqr(device_queue->val, convert(compz), n, d, e, reinterpret_cast*>(z), ldz, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, double *d, double *e, double *z, int64_t ldz, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::steqr(device_queue->val, convert(compz), n, d, e, z, ldz, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, float *d, float *e, float *z, int64_t ldz, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::steqr(device_queue->val, convert(compz), n, d, e, z, ldz, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, double *d, double *e, double _Complex *z, int64_t ldz, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::steqr(device_queue->val, convert(compz), n, d, e, reinterpret_cast*>(z), ldz, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsyev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *w, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::syev(device_queue->val, convert(jobz), convert(uplo), n, a, lda, w, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsyev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *w, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::syev(device_queue->val, convert(jobz), convert(uplo), n, a, lda, w, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsyevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *w, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::syevd(device_queue->val, convert(jobz), convert(uplo), n, a, lda, w, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsyevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *w, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::syevd(device_queue->val, convert(jobz), convert(uplo), n, a, lda, w, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsyevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t *m, double *w, double *z, int64_t ldz, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::syevx(device_queue->val, convert(jobz), convert(range), convert(uplo), n, a, lda, *vl, *vu, il, iu, *abstol, m, w, z, ldz, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsyevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t *m, float *w, float *z, int64_t ldz, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::syevx(device_queue->val, convert(jobz), convert(range), convert(uplo), n, a, lda, *vl, *vu, il, iu, *abstol, m, w, z, ldz, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsygvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *b, int64_t ldb, double *w, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::sygvd(device_queue->val, itype, convert(jobz), convert(uplo), n, a, lda, b, ldb, w, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsygvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *b, int64_t ldb, float *w, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::sygvd(device_queue->val, itype, convert(jobz), convert(uplo), n, a, lda, b, ldb, w, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsygvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *b, int64_t ldb, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t *m, double *w, double *z, int64_t ldz, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::sygvx(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, a, lda, b, ldb, *vl, *vu, il, iu, *abstol, m, w, z, ldz, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsygvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *b, int64_t ldb, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t *m, float *w, float *z, int64_t ldz, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::sygvx(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, a, lda, b, ldb, *vl, *vu, il, iu, *abstol, m, w, z, ldz, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsytrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *d, double *e, double *tau, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::sytrd(device_queue->val, convert(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsytrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *d, float *e, float *tau, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::sytrd(device_queue->val, convert(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtrs(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, double *a, int64_t lda, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtrs(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, float *a, int64_t lda, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtrs(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtrs(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCungbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ungbr(device_queue->val, convert(vec), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZungbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ungbr(device_queue->val, convert(vec), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCungqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ungqr(device_queue->val, m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZungqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ungqr(device_queue->val, m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCunmqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::unmqr(device_queue->val, convert(side), convert(trans), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZunmqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::unmqr(device_queue->val, convert(side), convert(trans), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgerqf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gerqf(device_queue->val, m, n, a, lda, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgerqf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gerqf(device_queue->val, m, n, a, lda, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgerqf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gerqf(device_queue->val, m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgerqf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gerqf(device_queue->val, m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSormrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ormrq(device_queue->val, convert(side), convert(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDormrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *c, int64_t ldc, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ormrq(device_queue->val, convert(side), convert(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCunmrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::unmrq(device_queue->val, convert(side), convert(trans), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZunmrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::unmrq(device_queue->val, convert(side), convert(trans), m, n, k, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, int64_t *ipiv, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::sytrf(device_queue->val, convert(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, int64_t *ipiv, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::sytrf(device_queue->val, convert(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::sytrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::sytrf(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, ipiv, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSorgtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::orgtr(device_queue->val, convert(uplo), n, a, lda, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDorgtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::orgtr(device_queue->val, convert(uplo), n, a, lda, tau, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCungtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ungtr(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZungtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ungtr(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSormtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ormtr(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDormtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, double *a, int64_t lda, double *tau, double *c, int64_t ldc, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ormtr(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCunmtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::unmtr(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZunmtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::unmtr(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, reinterpret_cast*>(a), lda, reinterpret_cast*>(tau), reinterpret_cast*>(c), ldc, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgels(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, float *a, int64_t lda, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels(device_queue->val, convert(trans), m, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgels(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, double *a, int64_t lda, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels(device_queue->val, convert(trans), m, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgels(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels(device_queue->val, convert(trans), m, n, nrhs, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgels(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels(device_queue->val, convert(trans), m, n, nrhs, reinterpret_cast*>(a), lda, reinterpret_cast*>(b), ldb, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, float **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo, group_count), n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, double **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo, group_count), n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, float _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo, group_count), n, reinterpret_cast**>(a), lda, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, double _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo, group_count), n, reinterpret_cast**>(a), lda, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, int64_t *nrhs, float **a, int64_t *lda, float **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo, group_count), n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, int64_t *nrhs, double **a, int64_t *lda, double **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo, group_count), n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, int64_t *nrhs, float _Complex **a, int64_t *lda, float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo, group_count), n, nrhs, reinterpret_cast**>(a), lda, reinterpret_cast**>(b), ldb, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, int64_t *nrhs, double _Complex **a, int64_t *lda, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo, group_count), n, nrhs, reinterpret_cast**>(a), lda, reinterpret_cast**>(b), ldb, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgeinv_batch(syclQueue_t device_queue, int64_t *n, float **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geinv_batch(device_queue->val, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgeinv_batch(syclQueue_t device_queue, int64_t *n, double **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geinv_batch(device_queue->val, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgeinv_batch(syclQueue_t device_queue, int64_t *n, float _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geinv_batch(device_queue->val, n, reinterpret_cast**>(a), lda, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgeinv_batch(syclQueue_t device_queue, int64_t *n, double _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geinv_batch(device_queue->val, n, reinterpret_cast**>(a), lda, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *n, int64_t *nrhs, float **a, int64_t *lda, int64_t **ipiv, float **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans, group_count), n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *n, int64_t *nrhs, double **a, int64_t *lda, int64_t **ipiv, double **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans, group_count), n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *n, int64_t *nrhs, float _Complex **a, int64_t *lda, int64_t **ipiv, float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans, group_count), n, nrhs, reinterpret_cast**>(a), lda, ipiv, reinterpret_cast**>(b), ldb, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *n, int64_t *nrhs, double _Complex **a, int64_t *lda, int64_t **ipiv, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getrs_batch(device_queue->val, convert(trans, group_count), n, nrhs, reinterpret_cast**>(a), lda, ipiv, reinterpret_cast**>(b), ldb, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetri_batch(syclQueue_t device_queue, int64_t *n, float **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri_batch(device_queue->val, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetri_batch(syclQueue_t device_queue, int64_t *n, double **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri_batch(device_queue->val, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetri_batch(syclQueue_t device_queue, int64_t *n, float _Complex **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri_batch(device_queue->val, n, reinterpret_cast**>(a), lda, ipiv, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetri_batch(syclQueue_t device_queue, int64_t *n, double _Complex **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri_batch(device_queue->val, n, reinterpret_cast**>(a), lda, ipiv, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float **a, int64_t *lda, float **tau, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double **a, int64_t *lda, double **tau, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float _Complex **a, int64_t *lda, float _Complex **tau, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, reinterpret_cast**>(a), lda, reinterpret_cast**>(tau), group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double _Complex **a, int64_t *lda, double _Complex **tau, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, reinterpret_cast**>(a), lda, reinterpret_cast**>(tau), group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSorgqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, int64_t *k, float **a, int64_t *lda, float **tau, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::orgqr_batch(device_queue->val, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDorgqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, int64_t *k, double **a, int64_t *lda, double **tau, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::orgqr_batch(device_queue->val, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCungqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, int64_t *k, float _Complex **a, int64_t *lda, float _Complex **tau, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ungqr_batch(device_queue->val, m, n, k, reinterpret_cast**>(a), lda, reinterpret_cast**>(tau), group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZungqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, int64_t *k, double _Complex **a, int64_t *lda, double _Complex **tau, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ungqr_batch(device_queue->val, m, n, k, reinterpret_cast**>(a), lda, reinterpret_cast**>(tau), group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSormqr_batch(syclQueue_t device_queue, onemklSide *side, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *k, float **a, int64_t *lda, float **tau, float **c, int64_t *ldc, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ormqr_batch(device_queue->val, convert(side, group_count), convert(trans, group_count), m, n, k, a, lda, tau, c, ldc, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDormqr_batch(syclQueue_t device_queue, onemklSide *side, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *k, double **a, int64_t *lda, double **tau, double **c, int64_t *ldc, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ormqr_batch(device_queue->val, convert(side, group_count), convert(trans, group_count), m, n, k, a, lda, tau, c, ldc, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCunmqr_batch(syclQueue_t device_queue, onemklSide *side, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *k, float _Complex **a, int64_t *lda, float _Complex **tau, float _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::unmqr_batch(device_queue->val, convert(side, group_count), convert(trans, group_count), m, n, k, reinterpret_cast**>(a), lda, reinterpret_cast**>(tau), reinterpret_cast**>(c), ldc, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZunmqr_batch(syclQueue_t device_queue, onemklSide *side, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *k, double _Complex **a, int64_t *lda, double _Complex **tau, double _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::unmqr_batch(device_queue->val, convert(side, group_count), convert(trans, group_count), m, n, k, reinterpret_cast**>(a), lda, reinterpret_cast**>(tau), reinterpret_cast**>(c), ldc, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklStrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemklTranspose *trans, onemklDiag *diag, int64_t *n, int64_t *nrhs, float **a, int64_t *lda, float **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtrs_batch(device_queue->val, convert(uplo, group_count), convert(trans, group_count), convert(diag, group_count), n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDtrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemklTranspose *trans, onemklDiag *diag, int64_t *n, int64_t *nrhs, double **a, int64_t *lda, double **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtrs_batch(device_queue->val, convert(uplo, group_count), convert(trans, group_count), convert(diag, group_count), n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCtrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemklTranspose *trans, onemklDiag *diag, int64_t *n, int64_t *nrhs, float _Complex **a, int64_t *lda, float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtrs_batch(device_queue->val, convert(uplo, group_count), convert(trans, group_count), convert(diag, group_count), n, nrhs, reinterpret_cast**>(a), lda, reinterpret_cast**>(b), ldb, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZtrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemklTranspose *trans, onemklDiag *diag, int64_t *n, int64_t *nrhs, double _Complex **a, int64_t *lda, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::trtrs_batch(device_queue->val, convert(uplo, group_count), convert(trans, group_count), convert(diag, group_count), n, nrhs, reinterpret_cast**>(a), lda, reinterpret_cast**>(b), ldb, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgels_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *nrhs, float **a, int64_t *lda, float **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans, group_count), m, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgels_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *nrhs, double **a, int64_t *lda, double **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans, group_count), m, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgels_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *nrhs, float _Complex **a, int64_t *lda,float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans, group_count), m, n, nrhs, reinterpret_cast**>(a), lda,reinterpret_cast**>(b), ldb, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgels_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *nrhs, double _Complex **a, int64_t *lda, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans, group_count), m, n, nrhs, reinterpret_cast**>(a), lda, reinterpret_cast**>(b), ldb, group_count, group_sizes, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo), n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo), n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, stride_a, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrf_batch(device_queue->val, convert(uplo), n, reinterpret_cast*>(a), lda, stride_a, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo), n, nrhs, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::potrs_batch(device_queue->val, convert(uplo), n, nrhs, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, float *tau, int64_t stride_tau, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, double *tau, int64_t stride_tau, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *tau, int64_t stride_tau, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(tau), stride_tau, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *tau, int64_t stride_tau, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::geqrf_batch(device_queue->val, m, n, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(tau), stride_tau, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSorgqr_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, int64_t stride_a, float *tau, int64_t stride_tau, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::orgqr_batch(device_queue->val, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDorgqr_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, int64_t stride_a, double *tau, int64_t stride_tau, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::orgqr_batch(device_queue->val, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCungqr_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *tau, int64_t stride_tau, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ungqr_batch(device_queue->val, m, n, k, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(tau), stride_tau, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZungqr_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *tau, int64_t stride_tau, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::ungqr_batch(device_queue->val, m, n, k, reinterpret_cast*>(a), lda, stride_a, reinterpret_cast*>(tau), stride_tau, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgetri_batch_strided(syclQueue_t device_queue, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri_batch(device_queue->val, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgetri_batch_strided(syclQueue_t device_queue, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri_batch(device_queue->val, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgetri_batch_strided(syclQueue_t device_queue, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri_batch(device_queue->val, n, reinterpret_cast*>(a), lda, stride_a, ipiv, stride_ipiv, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgetri_batch_strided(syclQueue_t device_queue, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::getri_batch(device_queue->val, n, reinterpret_cast*>(a), lda, stride_a, ipiv, stride_ipiv, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSgels_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, float *_a, int64_t lda, int64_t stride_a, float *_b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans), m, n, nrhs, _a, lda, stride_a, _b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDgels_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, double *_a, int64_t lda, int64_t stride_a, double *_b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans), m, n, nrhs, _a, lda, stride_a, _b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCgels_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, float _Complex *_a, int64_t lda, int64_t stride_a, float _Complex *_b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans), m, n, nrhs, reinterpret_cast*>(_a), lda, stride_a, reinterpret_cast*>(_b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZgels_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, double _Complex *_a, int64_t lda, int64_t stride_a, double _Complex *_b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size) { try { auto status = oneapi::mkl::lapack::gels_batch(device_queue->val, convert(trans), m, n, nrhs, reinterpret_cast*>(_a), lda, stride_a, reinterpret_cast*>(_b), ldb, stride_b, batch_size, reinterpret_cast*>(scratchpad), scratchpad_size, {}); device_queue->val.wait_and_throw(); } catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int64_t onemklSgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::gebrd_scratchpad_size(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::gebrd_scratchpad_size(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::gebrd_scratchpad_size>(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::gebrd_scratchpad_size>(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgels_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_scratchpad_size(device_queue->val, convert(trans), m, n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgels_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_scratchpad_size(device_queue->val, convert(trans), m, n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgels_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_scratchpad_size>(device_queue->val, convert(trans), m, n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgels_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_scratchpad_size>(device_queue->val, convert(trans), m, n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_scratchpad_size(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_scratchpad_size(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_scratchpad_size>(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_scratchpad_size>(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::gerqf_scratchpad_size(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::gerqf_scratchpad_size(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::gerqf_scratchpad_size>(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::gerqf_scratchpad_size>(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgesv_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::gesv_scratchpad_size(device_queue->val, n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgesv_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::gesv_scratchpad_size(device_queue->val, n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgesv_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::gesv_scratchpad_size>(device_queue->val, n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgesv_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::gesv_scratchpad_size>(device_queue->val, n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgesvd_scratchpad_size(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, int64_t lda, int64_t ldu, int64_t ldvt) { int64_t scratchpad_size = oneapi::mkl::lapack::gesvd_scratchpad_size(device_queue->val, convert(jobu), convert(jobvt), m, n, lda, ldu, ldvt); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgesvd_scratchpad_size(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, int64_t lda, int64_t ldu, int64_t ldvt) { int64_t scratchpad_size = oneapi::mkl::lapack::gesvd_scratchpad_size(device_queue->val, convert(jobu), convert(jobvt), m, n, lda, ldu, ldvt); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgesvd_scratchpad_size(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, int64_t lda, int64_t ldu, int64_t ldvt) { int64_t scratchpad_size = oneapi::mkl::lapack::gesvd_scratchpad_size>(device_queue->val, convert(jobu), convert(jobvt), m, n, lda, ldu, ldvt); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgesvd_scratchpad_size(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, int64_t lda, int64_t ldu, int64_t ldvt) { int64_t scratchpad_size = oneapi::mkl::lapack::gesvd_scratchpad_size>(device_queue->val, convert(jobu), convert(jobvt), m, n, lda, ldu, ldvt); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size>(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size>(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_scratchpad_size(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_scratchpad_size(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_scratchpad_size>(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_scratchpad_size>(device_queue->val, m, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size(device_queue->val, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size(device_queue->val, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size>(device_queue->val, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_scratchpad_size>(device_queue->val, n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCheev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::heev_scratchpad_size>(device_queue->val, convert(jobz), convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZheev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::heev_scratchpad_size>(device_queue->val, convert(jobz), convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCheevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::heevd_scratchpad_size>(device_queue->val, convert(jobz), convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZheevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::heevd_scratchpad_size>(device_queue->val, convert(jobz), convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCheevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::heevx_scratchpad_size>(device_queue->val, convert(jobz), convert(range), convert(uplo), n, lda, *vl, *vu, il, iu, *abstol, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZheevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::heevx_scratchpad_size>(device_queue->val, convert(jobz), convert(range), convert(uplo), n, lda, *vl, *vu, il, iu, *abstol, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklChegvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::hegvd_scratchpad_size>(device_queue->val, itype, convert(jobz), convert(uplo), n, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZhegvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::hegvd_scratchpad_size>(device_queue->val, itype, convert(jobz), convert(uplo), n, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklChegvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::hegvx_scratchpad_size>(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, lda, ldb, *vl, *vu, il, iu, *abstol, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZhegvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::hegvx_scratchpad_size>(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, lda, ldb, *vl, *vu, il, iu, *abstol, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklChetrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::hetrd_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZhetrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::hetrd_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklChetrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::hetrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZhetrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::hetrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSorgbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::orgbr_scratchpad_size(device_queue->val, convert(vect), m, n, k, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDorgbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::orgbr_scratchpad_size(device_queue->val, convert(vect), m, n, k, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSorgqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::orgqr_scratchpad_size(device_queue->val, m, n, k, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDorgqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::orgqr_scratchpad_size(device_queue->val, m, n, k, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSorgtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::orgtr_scratchpad_size(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDorgtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::orgtr_scratchpad_size(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSormqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::ormqr_scratchpad_size(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDormqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::ormqr_scratchpad_size(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSormrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::ormrq_scratchpad_size(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDormrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::ormrq_scratchpad_size(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSormtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::ormtr_scratchpad_size(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDormtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::ormtr_scratchpad_size(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_scratchpad_size(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_scratchpad_size(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::potri_scratchpad_size(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::potri_scratchpad_size(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::potri_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::potri_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_scratchpad_size(device_queue->val, convert(uplo), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_scratchpad_size(device_queue->val, convert(uplo), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_scratchpad_size>(device_queue->val, convert(uplo), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_scratchpad_size>(device_queue->val, convert(uplo), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::steqr_scratchpad_size(device_queue->val, convert(compz), n, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::steqr_scratchpad_size(device_queue->val, convert(compz), n, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::steqr_scratchpad_size>(device_queue->val, convert(compz), n, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::steqr_scratchpad_size>(device_queue->val, convert(compz), n, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSsyev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::syev_scratchpad_size(device_queue->val, convert(jobz), convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDsyev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::syev_scratchpad_size(device_queue->val, convert(jobz), convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSsyevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::syevd_scratchpad_size(device_queue->val, convert(jobz), convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDsyevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::syevd_scratchpad_size(device_queue->val, convert(jobz), convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSsyevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::syevx_scratchpad_size(device_queue->val, convert(jobz), convert(range), convert(uplo), n, lda, *vl, *vu, il, iu, *abstol, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDsyevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::syevx_scratchpad_size(device_queue->val, convert(jobz), convert(range), convert(uplo), n, lda, *vl, *vu, il, iu, *abstol, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSsygvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::sygvd_scratchpad_size(device_queue->val, itype, convert(jobz), convert(uplo), n, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDsygvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::sygvd_scratchpad_size(device_queue->val, itype, convert(jobz), convert(uplo), n, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSsygvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::sygvx_scratchpad_size(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, lda, ldb, *vl, *vu, il, iu, *abstol, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDsygvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t ldz) { int64_t scratchpad_size = oneapi::mkl::lapack::sygvx_scratchpad_size(device_queue->val, itype, convert(jobz), convert(range), convert(uplo), n, lda, ldb, *vl, *vu, il, iu, *abstol, ldz); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSsytrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::sytrd_scratchpad_size(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDsytrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::sytrd_scratchpad_size(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::sytrf_scratchpad_size(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::sytrf_scratchpad_size(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::sytrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::sytrf_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklStrtri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::trtri_scratchpad_size(device_queue->val, convert(uplo), convert(diag), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDtrtri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::trtri_scratchpad_size(device_queue->val, convert(uplo), convert(diag), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCtrtri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::trtri_scratchpad_size>(device_queue->val, convert(uplo), convert(diag), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZtrtri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::trtri_scratchpad_size>(device_queue->val, convert(uplo), convert(diag), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklStrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size>(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb) { int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_scratchpad_size>(device_queue->val, convert(uplo), convert(trans), convert(diag), n, nrhs, lda, ldb); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCungbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::ungbr_scratchpad_size>(device_queue->val, convert(vect), m, n, k, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZungbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::ungbr_scratchpad_size>(device_queue->val, convert(vect), m, n, k, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCungqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::ungqr_scratchpad_size>(device_queue->val, m, n, k, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZungqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::ungqr_scratchpad_size>(device_queue->val, m, n, k, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCungtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::ungtr_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZungtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda) { int64_t scratchpad_size = oneapi::mkl::lapack::ungtr_scratchpad_size>(device_queue->val, convert(uplo), n, lda); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCunmqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::unmqr_scratchpad_size>(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZunmqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::unmqr_scratchpad_size>(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCunmrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::unmrq_scratchpad_size>(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZunmrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::unmrq_scratchpad_size>(device_queue->val, convert(side), convert(trans), m, n, k, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCunmtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::unmtr_scratchpad_size>(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZunmtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc) { int64_t scratchpad_size = oneapi::mkl::lapack::unmtr_scratchpad_size>(device_queue->val, convert(side), convert(uplo), convert(trans), m, n, lda, ldc); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgeinv_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::geinv_batch_scratchpad_size(device_queue->val, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgeinv_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::geinv_batch_scratchpad_size(device_queue->val, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgeinv_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::geinv_batch_scratchpad_size>(device_queue->val, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgeinv_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::geinv_batch_scratchpad_size>(device_queue->val, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size(device_queue->val, convert(trans, group_count), m, n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size(device_queue->val, convert(trans, group_count), m, n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size>(device_queue->val, convert(trans, group_count), m, n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size>(device_queue->val, convert(trans, group_count), m, n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgels_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size(device_queue->val, convert(trans), m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgels_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size(device_queue->val, convert(trans), m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgels_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size>(device_queue->val, convert(trans), m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgels_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::gels_batch_scratchpad_size>(device_queue->val, convert(trans), m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size>(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size>(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgeqrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, stride_tau, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgeqrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, stride_tau, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgeqrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, stride_tau, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgeqrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::geqrf_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, stride_tau, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgesvda_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_s, int64_t ldu, int64_t stride_u, int64_t ldvt, int64_t stride_vt, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::gesvda_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, stride_s, ldu, stride_u, ldvt, stride_vt, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgesvda_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_s, int64_t ldu, int64_t stride_u, int64_t ldvt, int64_t stride_vt, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::gesvda_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, stride_s, ldu, stride_u, ldvt, stride_vt, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgesvda_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_s, int64_t ldu, int64_t stride_u, int64_t ldvt, int64_t stride_vt, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::gesvda_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, stride_s, ldu, stride_u, ldvt, stride_vt, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgesvda_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_s, int64_t ldu, int64_t stride_u, int64_t ldvt, int64_t stride_vt, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::gesvda_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, stride_s, ldu, stride_u, ldvt, stride_vt, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, stride_ipiv, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, stride_ipiv, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, stride_ipiv, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, stride_ipiv, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size>(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrf_batch_scratchpad_size>(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size(device_queue->val, m, n, lda, stride_a, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size>(device_queue->val, m, n, lda, stride_a, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size>(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrfnp_batch_scratchpad_size>(device_queue->val, m, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetri_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size(device_queue->val, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetri_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size(device_queue->val, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetri_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size>(device_queue->val, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetri_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size>(device_queue->val, n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetri_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size(device_queue->val, n, lda, stride_a, stride_ipiv, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetri_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size(device_queue->val, n, lda, stride_a, stride_ipiv, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetri_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size>(device_queue->val, n, lda, stride_a, stride_ipiv, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetri_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getri_batch_scratchpad_size>(device_queue->val, n, lda, stride_a, stride_ipiv, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size(device_queue->val, convert(trans, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size(device_queue->val, convert(trans, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size>(device_queue->val, convert(trans, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size>(device_queue->val, convert(trans, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrs_batch_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrsnp_batch_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrsnp_batch_scratchpad_size(device_queue->val, convert(trans), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrsnp_batch_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::getrsnp_batch_scratchpad_size>(device_queue->val, convert(trans), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSorgqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size(device_queue->val, m, n, k, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDorgqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size(device_queue->val, m, n, k, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSorgqr_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size(device_queue->val, m, n, k, lda, stride_a, stride_tau, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDorgqr_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::orgqr_batch_scratchpad_size(device_queue->val, m, n, k, lda, stride_a, stride_tau, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSormqr_batch_scratchpad_size(syclQueue_t device_queue, onemklSide* side, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t* ldc, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::ormqr_batch_scratchpad_size(device_queue->val, convert(side, group_count), convert(trans, group_count), m, n, k, lda, ldc, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDormqr_batch_scratchpad_size(syclQueue_t device_queue, onemklSide* side, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t* ldc, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::ormqr_batch_scratchpad_size(device_queue->val, convert(side, group_count), convert(trans, group_count), m, n, k, lda, ldc, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size(device_queue->val, convert(uplo, group_count), n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size(device_queue->val, convert(uplo, group_count), n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size>(device_queue->val, convert(uplo, group_count), n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size>(device_queue->val, convert(uplo, group_count), n, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSpotrf_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size(device_queue->val, convert(uplo), n, lda, stride_a, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDpotrf_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size(device_queue->val, convert(uplo), n, lda, stride_a, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCpotrf_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size>(device_queue->val, convert(uplo), n, lda, stride_a, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZpotrf_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::potrf_batch_scratchpad_size>(device_queue->val, convert(uplo), n, lda, stride_a, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size(device_queue->val, convert(uplo, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size(device_queue->val, convert(uplo, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size>(device_queue->val, convert(uplo, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size>(device_queue->val, convert(uplo, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklSpotrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size(device_queue->val, convert(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDpotrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size(device_queue->val, convert(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCpotrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size>(device_queue->val, convert(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZpotrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::potrs_batch_scratchpad_size>(device_queue->val, convert(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklStrtrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, onemklTranspose* trans, onemklDiag* diag, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_batch_scratchpad_size(device_queue->val, convert(uplo, group_count), convert(trans, group_count), convert(diag, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklDtrtrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, onemklTranspose* trans, onemklDiag* diag, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_batch_scratchpad_size(device_queue->val, convert(uplo, group_count), convert(trans, group_count), convert(diag, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCtrtrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, onemklTranspose* trans, onemklDiag* diag, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_batch_scratchpad_size>(device_queue->val, convert(uplo, group_count), convert(trans, group_count), convert(diag, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZtrtrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, onemklTranspose* trans, onemklDiag* diag, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::trtrs_batch_scratchpad_size>(device_queue->val, convert(uplo, group_count), convert(trans, group_count), convert(diag, group_count), n, nrhs, lda, ldb, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCungqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size>(device_queue->val, m, n, k, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZungqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size>(device_queue->val, m, n, k, lda, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCungqr_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size>(device_queue->val, m, n, k, lda, stride_a, stride_tau, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZungqr_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size) { int64_t scratchpad_size = oneapi::mkl::lapack::ungqr_batch_scratchpad_size>(device_queue->val, m, n, k, lda, stride_a, stride_tau, batch_size); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklCunmqr_batch_scratchpad_size(syclQueue_t device_queue, onemklSide* side, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t* ldc, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::unmqr_batch_scratchpad_size>(device_queue->val, convert(side, group_count), convert(trans, group_count), m, n, k, lda, ldc, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } extern "C" int64_t onemklZunmqr_batch_scratchpad_size(syclQueue_t device_queue, onemklSide* side, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t* ldc, int64_t group_count, int64_t* group_sizes) { int64_t scratchpad_size = oneapi::mkl::lapack::unmqr_batch_scratchpad_size>(device_queue->val, convert(side, group_count), convert(trans, group_count), m, n, k, lda, ldc, group_count, group_sizes); device_queue->val.wait_and_throw(); return scratchpad_size; } // SPARSE extern "C" int onemklXsparse_init_matrix_handle(matrix_handle_t *p_spMat) { oneapi::mkl::sparse::init_matrix_handle((oneapi::mkl::sparse::matrix_handle_t*) p_spMat); return 0; } extern "C" int onemklXsparse_release_matrix_handle(syclQueue_t device_queue, matrix_handle_t *p_spMat) { try { auto status = oneapi::mkl::sparse::release_matrix_handle(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t*) p_spMat, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, onemklIndex index, int32_t *row_ptr, int32_t *col_ind, float *values) { try { auto status = oneapi::mkl::sparse::set_csr_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, convert(index), row_ptr, col_ind, values, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, onemklIndex index, int64_t *row_ptr, int64_t *col_ind, float *values) { try { auto status = oneapi::mkl::sparse::set_csr_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, convert(index), row_ptr, col_ind, values, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, onemklIndex index, int32_t *row_ptr, int32_t *col_ind, double *values) { try { auto status = oneapi::mkl::sparse::set_csr_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, convert(index), row_ptr, col_ind, values, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, onemklIndex index, int64_t *row_ptr, int64_t *col_ind, double *values) { try { auto status = oneapi::mkl::sparse::set_csr_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, convert(index), row_ptr, col_ind, values, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, onemklIndex index, int32_t *row_ptr, int32_t *col_ind, float _Complex *values) { try { auto status = oneapi::mkl::sparse::set_csr_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, convert(index), row_ptr, col_ind, reinterpret_cast*>(values), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, onemklIndex index, int64_t *row_ptr, int64_t *col_ind, float _Complex *values) { try { auto status = oneapi::mkl::sparse::set_csr_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, convert(index), row_ptr, col_ind, reinterpret_cast*>(values), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, onemklIndex index, int32_t *row_ptr, int32_t *col_ind, double _Complex *values) { try { auto status = oneapi::mkl::sparse::set_csr_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, convert(index), row_ptr, col_ind, reinterpret_cast*>(values), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, onemklIndex index, int64_t *row_ptr, int64_t *col_ind, double _Complex *values) { try { auto status = oneapi::mkl::sparse::set_csr_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, convert(index), row_ptr, col_ind, reinterpret_cast*>(values), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, int32_t nnz, onemklIndex index, int32_t *row_ind, int32_t *col_ind, float *values) { try { auto status = oneapi::mkl::sparse::set_coo_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, nnz, convert(index), row_ind, col_ind, values, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, int64_t nnz, onemklIndex index, int64_t *row_ind, int64_t *col_ind, float *values) { try { auto status = oneapi::mkl::sparse::set_coo_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, nnz, convert(index), row_ind, col_ind, values, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, int32_t nnz, onemklIndex index, int32_t *row_ind, int32_t *col_ind, double *values) { try { auto status = oneapi::mkl::sparse::set_coo_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, nnz, convert(index), row_ind, col_ind, values, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, int64_t nnz, onemklIndex index, int64_t *row_ind, int64_t *col_ind, double *values) { try { auto status = oneapi::mkl::sparse::set_coo_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, nnz, convert(index), row_ind, col_ind, values, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, int32_t nnz, onemklIndex index, int32_t *row_ind, int32_t *col_ind, float _Complex *values) { try { auto status = oneapi::mkl::sparse::set_coo_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, nnz, convert(index), row_ind, col_ind, reinterpret_cast*>(values), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, int64_t nnz, onemklIndex index, int64_t *row_ind, int64_t *col_ind, float _Complex *values) { try { auto status = oneapi::mkl::sparse::set_coo_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, nnz, convert(index), row_ind, col_ind, reinterpret_cast*>(values), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, int32_t nnz, onemklIndex index, int32_t *row_ind, int32_t *col_ind, double _Complex *values) { try { auto status = oneapi::mkl::sparse::set_coo_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, nnz, convert(index), row_ind, col_ind, reinterpret_cast*>(values), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, int64_t nnz, onemklIndex index, int64_t *row_ind, int64_t *col_ind, double _Complex *values) { try { auto status = oneapi::mkl::sparse::set_coo_data(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, nrows, ncols, nnz, convert(index), row_ind, col_ind, reinterpret_cast*>(values), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_init_matmat_descr(matmat_descr_t *p_desc) { oneapi::mkl::sparse::init_matmat_descr((oneapi::mkl::sparse::matmat_descr_t*) p_desc); return 0; } extern "C" int onemklXsparse_release_matmat_descr(matmat_descr_t *p_desc) { oneapi::mkl::sparse::release_matmat_descr((oneapi::mkl::sparse::matmat_descr_t*) p_desc); return 0; } extern "C" int onemklXsparse_init_omatconvert_descr(syclQueue_t device_queue, omatconvert_descr_t *p_descr) { try { oneapi::mkl::sparse::init_omatconvert_descr(device_queue->val, (oneapi::mkl::sparse::omatconvert_descr_t*) p_descr); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_release_omatconvert_descr(syclQueue_t device_queue, omatconvert_descr_t descr) { try { auto status = oneapi::mkl::sparse::release_omatconvert_descr(device_queue->val, (oneapi::mkl::sparse::omatconvert_descr_t) descr, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_init_omatadd_descr(syclQueue_t device_queue, omatadd_descr_t *p_omatadd_desc) { try { oneapi::mkl::sparse::init_omatadd_descr(device_queue->val, (oneapi::mkl::sparse::omatadd_descr_t*) p_omatadd_desc); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_release_omatadd_descr(syclQueue_t device_queue, omatadd_descr_t omatadd_desc) { try { auto status = oneapi::mkl::sparse::release_omatadd_descr(device_queue->val, (oneapi::mkl::sparse::omatadd_descr_t) omatadd_desc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_omatcopy(syclQueue_t device_queue, onemklTranspose transpose_val, matrix_handle_t spMat_in, matrix_handle_t spMat_out) { try { auto status = oneapi::mkl::sparse::omatcopy(device_queue->val, convert(transpose_val), (oneapi::mkl::sparse::matrix_handle_t) spMat_in, (oneapi::mkl::sparse::matrix_handle_t) spMat_out, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_sort_matrix(syclQueue_t device_queue, matrix_handle_t spMat) { try { auto status = oneapi::mkl::sparse::sort_matrix(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_update_diagonal_values(syclQueue_t device_queue, matrix_handle_t spMat, int64_t length, float *new_diag_values) { try { auto status = oneapi::mkl::sparse::update_diagonal_values(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, length, new_diag_values, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_update_diagonal_values(syclQueue_t device_queue, matrix_handle_t spMat, int64_t length, double *new_diag_values) { try { auto status = oneapi::mkl::sparse::update_diagonal_values(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, length, new_diag_values, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_update_diagonal_values(syclQueue_t device_queue, matrix_handle_t spMat, int64_t length, float _Complex *new_diag_values) { try { auto status = oneapi::mkl::sparse::update_diagonal_values(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, length, reinterpret_cast*>(new_diag_values), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_update_diagonal_values(syclQueue_t device_queue, matrix_handle_t spMat, int64_t length, double _Complex *new_diag_values) { try { auto status = oneapi::mkl::sparse::update_diagonal_values(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) spMat, length, reinterpret_cast*>(new_diag_values), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_optimize_gemv(syclQueue_t device_queue, onemklTranspose opA, matrix_handle_t A) { try { auto status = oneapi::mkl::sparse::optimize_gemv(device_queue->val, convert(opA), (oneapi::mkl::sparse::matrix_handle_t) A, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_optimize_trmv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A) { try { auto status = oneapi::mkl::sparse::optimize_trmv(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), (oneapi::mkl::sparse::matrix_handle_t) A, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_optimize_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A) { try { auto status = oneapi::mkl::sparse::optimize_trsv(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), (oneapi::mkl::sparse::matrix_handle_t) A, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_optimize_gemm(syclQueue_t device_queue, onemklTranspose opA, matrix_handle_t A) { try { auto status = oneapi::mkl::sparse::optimize_gemm(device_queue->val, convert(opA), (oneapi::mkl::sparse::matrix_handle_t) A, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_optimize_gemm_advanced(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opB, matrix_handle_t A, int64_t columns) { try { auto status = oneapi::mkl::sparse::optimize_gemm(device_queue->val, convert(layout_val), convert(opA), convert(opB), (oneapi::mkl::sparse::matrix_handle_t) A, columns, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_optimize_trsm(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A) { try { auto status = oneapi::mkl::sparse::optimize_trsm(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), (oneapi::mkl::sparse::matrix_handle_t) A, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_optimize_trsm_advanced(syclQueue_t device_queue, onemklLayout layout_val, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A, int64_t columns) { try { auto status = oneapi::mkl::sparse::optimize_trsm(device_queue->val, convert(layout_val), convert(uplo_val), convert(opA), convert(diag_val), (oneapi::mkl::sparse::matrix_handle_t) A, columns, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, float *alpha, matrix_handle_t A, float *x, float *beta, float *y) { try { auto status = oneapi::mkl::sparse::gemv(device_queue->val, convert(opA), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, x, *beta, y, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, double *alpha, matrix_handle_t A, double *x, double *beta, double *y) { try { auto status = oneapi::mkl::sparse::gemv(device_queue->val, convert(opA), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, x, *beta, y, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, float _Complex *alpha, matrix_handle_t A, float _Complex *x, float _Complex *beta, float _Complex *y) { try { auto status = oneapi::mkl::sparse::gemv(device_queue->val, convert(opA), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(x), *reinterpret_cast*>(beta), reinterpret_cast*>(y), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, double _Complex *alpha, matrix_handle_t A, double _Complex *x, double _Complex *beta, double _Complex *y) { try { auto status = oneapi::mkl::sparse::gemv(device_queue->val, convert(opA), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(x), *reinterpret_cast*>(beta), reinterpret_cast*>(y), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA, float *alpha, matrix_handle_t A, float *x, float *beta, float *y, float *d) { try { auto status = oneapi::mkl::sparse::gemvdot(device_queue->val, convert(opA), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, x, *beta, y, d, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA, double *alpha, matrix_handle_t A, double *x, double *beta, double *y, double *d) { try { auto status = oneapi::mkl::sparse::gemvdot(device_queue->val, convert(opA), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, x, *beta, y, d, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA, float _Complex *alpha, matrix_handle_t A, float _Complex *x, float _Complex *beta, float _Complex *y, float _Complex *d) { try { auto status = oneapi::mkl::sparse::gemvdot(device_queue->val, convert(opA), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(x), *reinterpret_cast*>(beta), reinterpret_cast*>(y), reinterpret_cast*>(d), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA, double _Complex *alpha, matrix_handle_t A, double _Complex *x, double _Complex *beta, double _Complex *y, double _Complex *d) { try { auto status = oneapi::mkl::sparse::gemvdot(device_queue->val, convert(opA), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(x), *reinterpret_cast*>(beta), reinterpret_cast*>(y), reinterpret_cast*>(d), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, float *alpha, matrix_handle_t A, float *x, float *beta, float *y) { try { auto status = oneapi::mkl::sparse::symv(device_queue->val, convert(uplo_val), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, x, *beta, y, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, double *alpha, matrix_handle_t A, double *x, double *beta, double *y) { try { auto status = oneapi::mkl::sparse::symv(device_queue->val, convert(uplo_val), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, x, *beta, y, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, float _Complex *alpha, matrix_handle_t A, float _Complex *x, float _Complex *beta, float _Complex *y) { try { auto status = oneapi::mkl::sparse::symv(device_queue->val, convert(uplo_val), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(x), *reinterpret_cast*>(beta), reinterpret_cast*>(y), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, double _Complex *alpha, matrix_handle_t A, double _Complex *x, double _Complex *beta, double _Complex *y) { try { auto status = oneapi::mkl::sparse::symv(device_queue->val, convert(uplo_val), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(x), *reinterpret_cast*>(beta), reinterpret_cast*>(y), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, float *alpha, matrix_handle_t A, float *x, float *beta, float *y) { try { auto status = oneapi::mkl::sparse::trmv(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, x, *beta, y, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, double *alpha, matrix_handle_t A, double *x, double *beta, double *y) { try { auto status = oneapi::mkl::sparse::trmv(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, x, *beta, y, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, float _Complex *alpha, matrix_handle_t A, float _Complex *x, float _Complex *beta, float _Complex *y) { try { auto status = oneapi::mkl::sparse::trmv(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(x), *reinterpret_cast*>(beta), reinterpret_cast*>(y), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, double _Complex *alpha, matrix_handle_t A, double _Complex *x, double _Complex *beta, double _Complex *y) { try { auto status = oneapi::mkl::sparse::trmv(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(x), *reinterpret_cast*>(beta), reinterpret_cast*>(y), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, float *alpha, matrix_handle_t A, float *x, float *y) { try { auto status = oneapi::mkl::sparse::trsv(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, x, y, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, double *alpha, matrix_handle_t A, double *x, double *y) { try { auto status = oneapi::mkl::sparse::trsv(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, x, y, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, float _Complex *alpha, matrix_handle_t A, float _Complex *x, float _Complex *y) { try { auto status = oneapi::mkl::sparse::trsv(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(x), reinterpret_cast*>(y), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, double _Complex *alpha, matrix_handle_t A, double _Complex *x, double _Complex *y) { try { auto status = oneapi::mkl::sparse::trsv(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(x), reinterpret_cast*>(y), {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, float *alpha, matrix_handle_t A, float *X, int64_t columns, int64_t ldx, float *beta, float *Y, int64_t ldy) { try { auto status = oneapi::mkl::sparse::gemm(device_queue->val, convert(layout_val), convert(opA), convert(opX), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, X, columns, ldx, *beta, Y, ldy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, double *alpha, matrix_handle_t A, double *X, int64_t columns, int64_t ldx, double *beta, double *Y, int64_t ldy) { try { auto status = oneapi::mkl::sparse::gemm(device_queue->val, convert(layout_val), convert(opA), convert(opX), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, X, columns, ldx, *beta, Y, ldy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, float _Complex *alpha, matrix_handle_t A, float _Complex *X, int64_t columns, int64_t ldx, float _Complex *beta, float _Complex *Y, int64_t ldy) { try { auto status = oneapi::mkl::sparse::gemm(device_queue->val, convert(layout_val), convert(opA), convert(opX), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(X), columns, ldx, *reinterpret_cast*>(beta), reinterpret_cast*>(Y), ldy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, double _Complex *alpha, matrix_handle_t A, double _Complex *X, int64_t columns, int64_t ldx, double _Complex *beta, double _Complex *Y, int64_t ldy) { try { auto status = oneapi::mkl::sparse::gemm(device_queue->val, convert(layout_val), convert(opA), convert(opX), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(X), columns, ldx, *reinterpret_cast*>(beta), reinterpret_cast*>(Y), ldy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklSsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, onemklUplo uplo_val, onemklDiag diag_val, float *alpha, matrix_handle_t A, float *X, int64_t columns, int64_t ldx, float *Y, int64_t ldy) { try { auto status = oneapi::mkl::sparse::trsm(device_queue->val, convert(layout_val), convert(opA), convert(opX), convert(uplo_val), convert(diag_val), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, X, columns, ldx, Y, ldy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, onemklUplo uplo_val, onemklDiag diag_val, double *alpha, matrix_handle_t A, double *X, int64_t columns, int64_t ldx, double *Y, int64_t ldy) { try { auto status = oneapi::mkl::sparse::trsm(device_queue->val, convert(layout_val), convert(opA), convert(opX), convert(uplo_val), convert(diag_val), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, X, columns, ldx, Y, ldy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, onemklUplo uplo_val, onemklDiag diag_val, float _Complex *alpha, matrix_handle_t A, float _Complex *X, int64_t columns, int64_t ldx, float _Complex *Y, int64_t ldy) { try { auto status = oneapi::mkl::sparse::trsm(device_queue->val, convert(layout_val), convert(opA), convert(opX), convert(uplo_val), convert(diag_val), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(X), columns, ldx, reinterpret_cast*>(Y), ldy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, onemklUplo uplo_val, onemklDiag diag_val, double _Complex *alpha, matrix_handle_t A, double _Complex *X, int64_t columns, int64_t ldx, double _Complex *Y, int64_t ldy) { try { auto status = oneapi::mkl::sparse::trsm(device_queue->val, convert(layout_val), convert(opA), convert(opX), convert(uplo_val), convert(diag_val), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, reinterpret_cast*>(X), columns, ldx, reinterpret_cast*>(Y), ldy, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_set_matmat_data(matmat_descr_t descr, onemklMatrixView viewA, onemklTranspose opA, onemklMatrixView viewB, onemklTranspose opB, onemklMatrixView viewC) { oneapi::mkl::sparse::set_matmat_data((oneapi::mkl::sparse::matmat_descr_t) descr, convert(viewA), convert(opA), convert(viewB), convert(opB), convert(viewC)); return 0; } extern "C" int onemklSsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layout, onemklTranspose opA, onemklTranspose opB, float *alpha, matrix_handle_t A, matrix_handle_t B, float *beta, float *C, int64_t c_nrows, int64_t c_ncols, int64_t ldc) { try { auto status = oneapi::mkl::sparse::matmatd(device_queue->val, convert(c_layout), convert(opA), convert(opB), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, (oneapi::mkl::sparse::matrix_handle_t) B, *beta, C, c_nrows, c_ncols, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklDsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layout, onemklTranspose opA, onemklTranspose opB, double *alpha, matrix_handle_t A, matrix_handle_t B, double *beta, double *C, int64_t c_nrows, int64_t c_ncols, int64_t ldc) { try { auto status = oneapi::mkl::sparse::matmatd(device_queue->val, convert(c_layout), convert(opA), convert(opB), *alpha, (oneapi::mkl::sparse::matrix_handle_t) A, (oneapi::mkl::sparse::matrix_handle_t) B, *beta, C, c_nrows, c_ncols, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklCsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layout, onemklTranspose opA, onemklTranspose opB, float _Complex *alpha, matrix_handle_t A, matrix_handle_t B, float _Complex *beta, float _Complex *C, int64_t c_nrows, int64_t c_ncols, int64_t ldc) { try { auto status = oneapi::mkl::sparse::matmatd(device_queue->val, convert(c_layout), convert(opA), convert(opB), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, (oneapi::mkl::sparse::matrix_handle_t) B, *reinterpret_cast*>(beta), reinterpret_cast*>(C), c_nrows, c_ncols, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklZsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layout, onemklTranspose opA, onemklTranspose opB, double _Complex *alpha, matrix_handle_t A, matrix_handle_t B, double _Complex *beta, double _Complex *C, int64_t c_nrows, int64_t c_ncols, int64_t ldc) { try { auto status = oneapi::mkl::sparse::matmatd(device_queue->val, convert(c_layout), convert(opA), convert(opB), *reinterpret_cast*>(alpha), (oneapi::mkl::sparse::matrix_handle_t) A, (oneapi::mkl::sparse::matrix_handle_t) B, *reinterpret_cast*>(beta), reinterpret_cast*>(C), c_nrows, c_ncols, ldc, {}); device_queue->val.wait_and_throw(); } catch (const sycl::exception& e) { return -1; } return 0; } extern "C" int onemklXsparse_matmat(syclQueue_t device_queue, matrix_handle_t A, matrix_handle_t B, matrix_handle_t C, onemklMatmatRequest req, matmat_descr_t descr, int64_t *sizeTempBuffer, void *tempBuffer) { auto status = oneapi::mkl::sparse::matmat(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) A, (oneapi::mkl::sparse::matrix_handle_t) B, (oneapi::mkl::sparse::matrix_handle_t) C, convert(req), (oneapi::mkl::sparse::matmat_descr_t) descr, sizeTempBuffer, tempBuffer, {}); device_queue->val.wait_and_throw(); return 0; } // other // oneMKL keeps a cache of SYCL queues and tries to destroy them when unloading the library. // that is incompatible with oneAPI.jl destroying queues before that, so call mkl_free_buffers // to manually wipe the device cache when we're destroying queues. extern "C" int onemklDestroy() { mkl_free_buffers(); return 0; } ================================================ FILE: deps/src/onemkl.h ================================================ #pragma once #include "sycl.h" #include #include #ifdef __cplusplus extern "C" { #endif // BLAS types typedef enum { ONEMKL_TRANSPOSE_NONTRANS, ONEMKL_TRANSPOSE_TRANS, ONEMLK_TRANSPOSE_CONJTRANS } onemklTranspose; typedef enum { ONEMKL_UPLO_UPPER, ONEMKL_UPLO_LOWER } onemklUplo; typedef enum { ONEMKL_DIAG_NONUNIT, ONEMKL_DIAG_UNIT } onemklDiag; typedef enum { ONEMKL_SIDE_LEFT, ONEMKL_SIDE_RIGHT } onemklSide; typedef enum { ONEMKL_OFFSET_ROW, ONEMKL_OFFSET_COL, ONEMKL_OFFSET_FIX, } onemklOffset; // LAPACK types typedef enum { ONEMKL_JOB_N, ONEMKL_JOB_V, ONEMKL_JOB_U, ONEMKL_JOB_A, ONEMKL_JOB_S, ONEMKL_JOB_O } onemklJob; typedef enum { ONEMKL_GENERATE_Q, ONEMKL_GENERATE_P, ONEMKL_GENERATE_N, ONEMKL_GENERATE_V } onemklGenerate; typedef enum { ONEMKL_COMPZ_N, ONEMKL_COMPZ_V, ONEMKL_COMPZ_I } onemklCompz; typedef enum { ONEMKL_DIRECT_F, ONEMKL_DIRECT_B } onemklDirect; typedef enum { ONEMKL_STOREV_C, ONEMKL_STOREV_R } onemklStorev; typedef enum { ONEMKL_RANGEV_A, ONEMKL_RANGEV_V, ONEMKL_RANGEV_I } onemklRangev; typedef enum { ONEMKL_ORDER_B, ONEMKL_ORDER_E } onemklOrder; typedef enum { ONEMKL_JOBSVD_N, ONEMKL_JOBSVD_A, ONEMKL_JOBSVD_O, ONEMKL_JOBSVD_S } onemklJobsvd; typedef enum { ONEMKL_LAYOUT_ROW, ONEMKL_LAYOUT_COL, } onemklLayout; typedef enum { ONEMKL_INDEX_ZERO, ONEMKL_INDEX_ONE, } onemklIndex; // SPARSE types typedef enum { ONEMKL_PROPERTY_SYMMETRIC, ONEMKL_PROPERTY_SORTED, } onemklProperty; typedef enum { ONEMKL_MATRIX_VIEW_GENERAL, } onemklMatrixView; typedef enum { ONEMKL_MATMAT_REQUEST_GET_WORK_ESTIMATION_BUF_SIZE, ONEMKL_MATMAT_REQUEST_WORK_ESTIMATION, ONEMKL_MATMAT_REQUEST_GET_COMPUTE_STRUCTURE_BUF_SIZE, ONEMKL_MATMAT_REQUEST_COMPUTE_STRUCTURE, ONEMKL_MATMAT_REQUEST_FINALIZE_STRUCTURE, ONEMKL_MATMAT_REQUEST_GET_COMPUTE_BUF_SIZE, ONEMKL_MATMAT_REQUEST_COMPUTE, ONEMKL_MATMAT_REQUEST_GET_NNZ, ONEMKL_MATMAT_REQUEST_FINALIZE, } onemklMatmatRequest; typedef enum { ONEMKL_OMATCONVERT_DEFAULT_ALG, } onemklOmatconvertAlg; typedef enum { ONEMKL_OMATADD_DEFAULT_ALG, } onemklOmataddAlg; struct matrix_handle; typedef struct matrix_handle *matrix_handle_t; struct matmat_descr; typedef struct matmat_descr *matmat_descr_t; struct omatconvert_descr; typedef struct omatconvert_descr *omatconvert_descr_t; struct omatadd_descr; typedef struct omatadd_descr *omatadd_descr_t; const int64_t ONEMKL_VERSION_MAJOR = 2025; const int64_t ONEMKL_VERSION_MINOR = 2; const int64_t ONEMKL_VERSION_PATCH = 0; void onemkl_version(int64_t *major, int64_t *minor, int64_t *patch); int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, uint16_t *alpha, const short **a, int64_t *lda, const short **b, int64_t *ldb, uint16_t *beta, short **c, int64_t *ldc, int64_t group_count, int64_t *group_size); int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, const float **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, int64_t group_count, int64_t *group_size); int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, const double **b, int64_t *ldb, double *beta, double **c, int64_t *ldc, int64_t group_count, int64_t *group_size); int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, float _Complex *alpha, const float _Complex **a, int64_t *lda, const float _Complex **b, int64_t *ldb, float _Complex *beta, float _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_size); int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t *m, int64_t *n, int64_t *k, double _Complex *alpha, const double _Complex **a, int64_t *lda, const double _Complex **b, int64_t *ldb, double _Complex *beta, double _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_size); int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, float *alpha, const float **a, int64_t *lda, float **b, int64_t *ldb, int64_t group_count, int64_t *group_size); int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, double *alpha, const double **a, int64_t *lda, double **b, int64_t *ldb, int64_t group_count, int64_t *group_size); int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, float _Complex *alpha, const float _Complex **a, int64_t *lda, float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_size); int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose transa, onemklDiag unit_diag, int64_t *m, int64_t *n, double _Complex *alpha, const double _Complex **a, int64_t *lda, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_size); // BLAS int onemklHgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, short *alpha, short *a, int64_t lda, short *b, int64_t ldb, short *beta, short *c, int64_t ldc); int onemklSgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc); int onemklDgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc); int onemklCgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc); int onemklZgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc); int onemklSsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc); int onemklDsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc); int onemklCsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc); int onemklZsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc); int onemklChemm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc); int onemklZhemm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc); int onemklSsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *beta, float *c, int64_t ldc); int onemklDsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *beta, double *c, int64_t ldc); int onemklCsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *beta, float _Complex *c, int64_t ldc); int onemklZsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *beta, double _Complex *c, int64_t ldc); int onemklCherk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float _Complex *a, int64_t lda, float *beta, float _Complex *c, int64_t ldc); int onemklZherk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double _Complex *a, int64_t lda, double *beta, double _Complex *c, int64_t ldc); int onemklSsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc); int onemklDsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc); int onemklCsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc); int onemklZsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc); int onemklCher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float *beta, float _Complex *c, int64_t ldc); int onemklZher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double *beta, double _Complex *c, int64_t ldc); int onemklStrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb); int onemklDtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb); int onemklCtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb); int onemklZtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb); int onemklStrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc); int onemklDtrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc); int onemklCtrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc); int onemklZtrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc); int onemklStrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb); int onemklDtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb); int onemklCtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb); int onemklZtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb); int onemklStrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc); int onemklDtrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc); int onemklCtrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc); int onemklZtrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc); int onemklSdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float *a, int64_t lda, float *x, int64_t incx, float *c, int64_t ldc); int onemklDdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double *a, int64_t lda, double *x, int64_t incx, double *c, int64_t ldc); int onemklCdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *c, int64_t ldc); int onemklZdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *c, int64_t ldc); int onemklSgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy); int onemklDgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy); int onemklCgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy); int onemklZgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy); int onemklSgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy); int onemklDgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy); int onemklCgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy); int onemklZgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy); int onemklSger(syclQueue_t device_queue, int64_t m, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy, float *a, int64_t lda); int onemklDger(syclQueue_t device_queue, int64_t m, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy, double *a, int64_t lda); int onemklCgerc(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda); int onemklZgerc(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda); int onemklCgeru(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda); int onemklZgeru(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda); int onemklChbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy); int onemklZhbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy); int onemklChemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy); int onemklZhemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy); int onemklCher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float _Complex *x, int64_t incx, float _Complex *a, int64_t lda); int onemklZher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double _Complex *x, int64_t incx, double _Complex *a, int64_t lda); int onemklCher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda); int onemklZher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda); int onemklChpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *a, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy); int onemklZhpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *a, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy); int onemklChpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float _Complex *x, int64_t incx, float _Complex *a); int onemklZhpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double _Complex *x, int64_t incx, double _Complex *a); int onemklChpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a); int onemklZhpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a); int onemklSsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy); int onemklDsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy); int onemklSsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy); int onemklDsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy); int onemklCsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy); int onemklZsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy); int onemklSsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *a, int64_t lda); int onemklDsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *a, int64_t lda); int onemklCsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *a, int64_t lda); int onemklZsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *a, int64_t lda); int onemklSsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy, float *a, int64_t lda); int onemklDsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy, double *a, int64_t lda); int onemklCsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda); int onemklZsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda); int onemklSspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *a, float *x, int64_t incx, float *beta, float *y, int64_t incy); int onemklDspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *a, double *x, int64_t incx, double *beta, double *y, int64_t incy); int onemklSspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *a); int onemklDspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *a); int onemklSspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy, float *a); int onemklDspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy, double *a); int onemklStbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float *a, int64_t lda, float *x, int64_t incx); int onemklDtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double *a, int64_t lda, double *x, int64_t incx); int onemklCtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx); int onemklZtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx); int onemklStbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float *a, int64_t lda, float *x, int64_t incx); int onemklDtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double *a, int64_t lda, double *x, int64_t incx); int onemklCtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx); int onemklZtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx); int onemklStpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, float *x, int64_t incx); int onemklDtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, double *x, int64_t incx); int onemklCtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, float _Complex *x, int64_t incx); int onemklZtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, double _Complex *x, int64_t incx); int onemklStpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, float *x, int64_t incx); int onemklDtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, double *x, int64_t incx); int onemklCtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, float _Complex *x, int64_t incx); int onemklZtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, double _Complex *x, int64_t incx); int onemklStrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, int64_t lda, float *x, int64_t incx); int onemklDtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, int64_t lda, double *x, int64_t incx); int onemklCtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx); int onemklZtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx); int onemklStrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, int64_t lda, float *x, int64_t incx); int onemklDtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, int64_t lda, double *x, int64_t incx); int onemklCtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx); int onemklZtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx); int onemklCdotc(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *result); int onemklZdotc(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *result); int onemklCdotu(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *result); int onemklZdotu(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *result); int onemklSiamax(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int32_t *result, onemklIndex base); int onemklSiamax_64(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t *result, onemklIndex base); int onemklDiamax(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int32_t *result, onemklIndex base); int onemklDiamax_64(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t *result, onemklIndex base); int onemklCiamax(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int32_t *result, onemklIndex base); int onemklCiamax_64(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t *result, onemklIndex base); int onemklZiamax(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int32_t *result, onemklIndex base); int onemklZiamax_64(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t *result, onemklIndex base); int onemklSiamin(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int32_t *result, onemklIndex base); int onemklSiamin_64(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t *result, onemklIndex base); int onemklDiamin(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int32_t *result, onemklIndex base); int onemklDiamin_64(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t *result, onemklIndex base); int onemklCiamin(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int32_t *result, onemklIndex base); int onemklCiamin_64(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t *result, onemklIndex base); int onemklZiamin(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int32_t *result, onemklIndex base); int onemklZiamin_64(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t *result, onemklIndex base); int onemklSasum(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *result); int onemklDasum(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *result); int onemklCasum(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float *result); int onemklZasum(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double *result); int onemklHaxpy(syclQueue_t device_queue, int64_t n, short *alpha, short *x, int64_t incx, short *y, int64_t incy); int onemklSaxpy(syclQueue_t device_queue, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy); int onemklDaxpy(syclQueue_t device_queue, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy); int onemklCaxpy(syclQueue_t device_queue, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy); int onemklZaxpy(syclQueue_t device_queue, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy); int onemklSaxpby(syclQueue_t device_queue, int64_t n, float *alpha, float *x, int64_t incx, float *beta, float *y, int64_t incy); int onemklDaxpby(syclQueue_t device_queue, int64_t n, double *alpha, double *x, int64_t incx, double *beta, double *y, int64_t incy); int onemklCaxpby(syclQueue_t device_queue, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy); int onemklZaxpby(syclQueue_t device_queue, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy); int onemklScopy(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy); int onemklDcopy(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy); int onemklCcopy(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy); int onemklZcopy(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy); int onemklHdot(syclQueue_t device_queue, int64_t n, short *x, int64_t incx, short *y, int64_t incy, short *result); int onemklSdot(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, float *result); int onemklDdot(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, double *result); int onemklSsdsdot(syclQueue_t device_queue, int64_t n, float *sb, float *x, int64_t incx, float *y, int64_t incy, float *result); int onemklHnrm2(syclQueue_t device_queue, int64_t n, short *x, int64_t incx, short *result); int onemklSnrm2(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *result); int onemklDnrm2(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *result); int onemklCnrm2(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float *result); int onemklZnrm2(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double *result); int onemklHrot(syclQueue_t device_queue, int64_t n, short *x, int64_t incx, short *y, int64_t incy, short *c, short *s); int onemklSrot(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, float *c, float *s); int onemklDrot(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, double *c, double *s); int onemklCSrot(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float *c, float *s); int onemklCrot(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float *c, float _Complex *s); int onemklZDrot(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double *c, double *s); int onemklZrot(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double *c, double _Complex *s); int onemklSrotg(syclQueue_t device_queue, float *a, float *b, float *c, float *s); int onemklDrotg(syclQueue_t device_queue, double *a, double *b, double *c, double *s); int onemklCrotg(syclQueue_t device_queue, float _Complex *a, float _Complex *b, float *c, float _Complex *s); int onemklZrotg(syclQueue_t device_queue, double _Complex *a, double _Complex *b, double *c, double _Complex *s); int onemklSrotm(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, float *param); int onemklDrotm(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, double *param); int onemklSrotmg(syclQueue_t device_queue, float *d1, float *d2, float *x1, float *y1, float *param); int onemklDrotmg(syclQueue_t device_queue, double *d1, double *d2, double *x1, double *y1, double *param); int onemklHscal(syclQueue_t device_queue, int64_t n, short *alpha, short *x, int64_t incx); int onemklSscal(syclQueue_t device_queue, int64_t n, float *alpha, float *x, int64_t incx); int onemklDscal(syclQueue_t device_queue, int64_t n, double *alpha, double *x, int64_t incx); int onemklCSscal(syclQueue_t device_queue, int64_t n, float *alpha, float _Complex *x, int64_t incx); int onemklZDscal(syclQueue_t device_queue, int64_t n, double *alpha, double _Complex *x, int64_t incx); int onemklCscal(syclQueue_t device_queue, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx); int onemklZscal(syclQueue_t device_queue, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx); int onemklSswap(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy); int onemklDswap(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy); int onemklCswap(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy); int onemklZswap(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy); int onemklHgemm_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, short *alpha, short *a, int64_t lda, int64_t stride_a, short *b, int64_t ldb, int64_t stride_b, short *beta, short *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklSgemm_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, float *beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklDgemm_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, double *beta, double *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklCgemm_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, float _Complex *beta, float _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklZgemm_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, double _Complex *beta, double _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklSsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, int64_t stride_a, float *beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklDsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, int64_t stride_a, double *beta, double *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklCsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *beta, float _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklZsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *beta, double _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklStrsm_batch_strided(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size); int onemklDtrsm_batch_strided(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size); int onemklCtrsm_batch_strided(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size); int onemklZtrsm_batch_strided(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size); int onemklSgemv_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, int64_t stridea, float *x, int64_t incx, int64_t stridex, float *beta, float *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklDgemv_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, int64_t stridea, double *x, int64_t incx, int64_t stridex, double *beta, double *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklCgemv_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stridea, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *beta, float _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklZgemv_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stridea, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *beta, double _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklSdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float *a, int64_t lda, int64_t stridea, float *x, int64_t incx, int64_t stridex, float *c, int64_t ldc, int64_t stridec, int64_t batch_size); int onemklDdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double *a, int64_t lda, int64_t stridea, double *x, int64_t incx, int64_t stridex, double *c, int64_t ldc, int64_t stridec, int64_t batch_size); int onemklCdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stridea, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *c, int64_t ldc, int64_t stridec, int64_t batch_size); int onemklZdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stridea, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *c, int64_t ldc, int64_t stridec, int64_t batch_size); int onemklSaxpy_batch_strided(syclQueue_t device_queue, int64_t n, float *alpha, float *x, int64_t incx, int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklDaxpy_batch_strided(syclQueue_t device_queue, int64_t n, double *alpha, double *x, int64_t incx, int64_t stridex, double *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklCaxpy_batch_strided(syclQueue_t device_queue, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklZaxpy_batch_strided(syclQueue_t device_queue, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklScopy_batch_strided(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklDcopy_batch_strided(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t stridex, double *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklCcopy_batch_strided(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t stridex, float _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklZcopy_batch_strided(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t stridex, double _Complex *y, int64_t incy, int64_t stridey, int64_t batch_size); int onemklSgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc); int onemklDgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc); int onemklCgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc); int onemklZgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose transa, onemklTranspose transb, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc); int onemklSimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *ab, int64_t lda, int64_t ldb); int onemklDimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *ab, int64_t lda, int64_t ldb); int onemklCimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *ab, int64_t lda, int64_t ldb); int onemklZimatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *ab, int64_t lda, int64_t ldb); int onemklSomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb); int onemklDomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb); int onemklComatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb); int onemklZomatcopy(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb); int onemklSomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *beta, float *b, int64_t ldb, float *c, int64_t ldc); int onemklDomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *beta, double *b, int64_t ldb, double *c, int64_t ldc); int onemklComatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *beta, float _Complex *b, int64_t ldb, float _Complex *c, int64_t ldc); int onemklZomatadd(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *beta, double _Complex *b, int64_t ldb, double _Complex *c, int64_t ldc); int onemklSimatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); int onemklDimatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); int onemklCimatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); int onemklZimatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); int onemklSomatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size); int onemklDomatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size); int onemklComatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size); int onemklZomatcopy_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size); int onemklSomatadd_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, int64_t stride_a, float *beta, float *b, int64_t ldb, int64_t stride_b, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklDomatadd_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, int64_t stride_a, double *beta, double *b, int64_t ldb, int64_t stride_b, double *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklComatadd_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *beta, float _Complex *b, int64_t ldb, int64_t stride_b, float _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size); int onemklZomatadd_batch_strided(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *beta, double _Complex *b, int64_t ldb, int64_t stride_b, double _Complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size); // LAPACK int onemklSpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *scratchpad, int64_t scratchpad_size); int onemklDpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *scratchpad, int64_t scratchpad_size); int onemklCpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float *a, int64_t lda, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size); int onemklDpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double *a, int64_t lda, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size); int onemklCpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *scratchpad, int64_t scratchpad_size); int onemklDpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *scratchpad, int64_t scratchpad_size); int onemklCpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *scratchpad, int64_t scratchpad_size); int onemklStrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, float *a, int64_t lda, float *scratchpad, int64_t scratchpad_size); int onemklDtrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, double *a, int64_t lda, double *scratchpad, int64_t scratchpad_size); int onemklCtrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZtrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t *ipiv, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size); int onemklDgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t *ipiv, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size); int onemklCgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgebrd(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float *d, float *e, float _Complex *tauq, float _Complex *taup, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgebrd(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *d, double *e, double *tauq, double *taup, double *scratchpad, int64_t scratchpad_size); int onemklSgebrd(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *d, float *e, float *tauq, float *taup, float *scratchpad, int64_t scratchpad_size); int onemklZgebrd(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double *d, double *e, double _Complex *tauq, double _Complex *taup, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size); int onemklSgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size); int onemklZgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, float _Complex *a, int64_t lda, float *s, float _Complex *u, int64_t ldu, float _Complex *vt, int64_t ldvt, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, double _Complex *a, int64_t lda, double *s, double _Complex *u, int64_t ldu, double _Complex *vt, int64_t ldvt, double _Complex *scratchpad, int64_t scratchpad_size); int onemklDgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, double *a, int64_t lda, double *s, double *u, int64_t ldu, double *vt, int64_t ldvt, double *scratchpad, int64_t scratchpad_size); int onemklSgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, float *a, int64_t lda, float *s, float *u, int64_t ldu, float *vt, int64_t ldvt, float *scratchpad, int64_t scratchpad_size); int onemklCgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm, int64_t *irank, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, float *s, int64_t stride_s, float _Complex *u, int64_t ldu, int64_t stride_u, float _Complex *vt, int64_t ldvt, int64_t stride_vt, float *tolerance, float *residual, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm, int64_t *irank, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, double *s, int64_t stride_s, double *u, int64_t ldu, int64_t stride_u, double *vt, int64_t ldvt, int64_t stride_vt, double *tolerance, double *residual, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklSgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm, int64_t *irank, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, float *s, int64_t stride_s, float *u, int64_t ldu, int64_t stride_u, float *vt, int64_t ldvt, int64_t stride_vt, float *tolerance, float *residual, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklZgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm, int64_t *irank, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, double *s, int64_t stride_s, double _Complex *u, int64_t ldu, int64_t stride_u, double _Complex *vt, int64_t ldvt, int64_t stride_vt, double *tolerance, double *residual, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgetrf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgetrf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t *ipiv, double *scratchpad, int64_t scratchpad_size); int onemklSgetrf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t *ipiv, float *scratchpad, int64_t scratchpad_size); int onemklZgetrf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float _Complex **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklSgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklZgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double _Complex **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklSgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklZgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *scratchpad, int64_t scratchpad_size); int onemklSgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *scratchpad, int64_t scratchpad_size); int onemklZgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklSgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklZgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklSgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklZgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgetri(syclQueue_t device_queue, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgetri(syclQueue_t device_queue, int64_t n, double *a, int64_t lda, int64_t *ipiv, double *scratchpad, int64_t scratchpad_size); int onemklSgetri(syclQueue_t device_queue, int64_t n, float *a, int64_t lda, int64_t *ipiv, float *scratchpad, int64_t scratchpad_size); int onemklZgetri(syclQueue_t device_queue, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t *ipiv, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size); int onemklSgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t *ipiv, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size); int onemklZgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklSgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklZgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklSgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklZgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCheev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *w, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZheev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *w, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCheevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *w, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZheevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *w, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCheevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t *m, float *w, float _Complex *z, int64_t ldz, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZheevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t *m, double *w, double _Complex *z, int64_t ldz, double _Complex *scratchpad, int64_t scratchpad_size); int onemklChegvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float *w, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZhegvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double *w, double _Complex *scratchpad, int64_t scratchpad_size); int onemklChegvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t *m, float *w, float _Complex *z, int64_t ldz, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZhegvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t *m, double *w, double _Complex *z, int64_t ldz, double _Complex *scratchpad, int64_t scratchpad_size); int onemklChetrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float *d, float *e, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZhetrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double *d, double *e, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size); int onemklChetrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZhetrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSorgbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size); int onemklDorgbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size); int onemklDorgqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size); int onemklSorgqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size); int onemklDormqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *c, int64_t ldc, double *scratchpad, int64_t scratchpad_size); int onemklSormqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float *scratchpad, int64_t scratchpad_size); int onemklCsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, float *d, float *e, float _Complex *z, int64_t ldz, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, double *d, double *e, double *z, int64_t ldz, double *scratchpad, int64_t scratchpad_size); int onemklSsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, float *d, float *e, float *z, int64_t ldz, float *scratchpad, int64_t scratchpad_size); int onemklZsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n, double *d, double *e, double _Complex *z, int64_t ldz, double _Complex *scratchpad, int64_t scratchpad_size); int onemklDsyev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *w, double *scratchpad, int64_t scratchpad_size); int onemklSsyev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *w, float *scratchpad, int64_t scratchpad_size); int onemklDsyevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *w, double *scratchpad, int64_t scratchpad_size); int onemklSsyevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *w, float *scratchpad, int64_t scratchpad_size); int onemklDsyevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t *m, double *w, double *z, int64_t ldz, double *scratchpad, int64_t scratchpad_size); int onemklSsyevx(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t *m, float *w, float *z, int64_t ldz, float *scratchpad, int64_t scratchpad_size); int onemklDsygvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *b, int64_t ldb, double *w, double *scratchpad, int64_t scratchpad_size); int onemklSsygvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *b, int64_t ldb, float *w, float *scratchpad, int64_t scratchpad_size); int onemklDsygvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *b, int64_t ldb, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t *m, double *w, double *z, int64_t ldz, double *scratchpad, int64_t scratchpad_size); int onemklSsygvx(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *b, int64_t ldb, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t *m, float *w, float *z, int64_t ldz, float *scratchpad, int64_t scratchpad_size); int onemklDsytrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *d, double *e, double *tau, double *scratchpad, int64_t scratchpad_size); int onemklSsytrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *d, float *e, float *tau, float *scratchpad, int64_t scratchpad_size); int onemklCtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size); int onemklDtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, double *a, int64_t lda, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size); int onemklStrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, float *a, int64_t lda, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size); int onemklZtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCungbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZungbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCungqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZungqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size); int onemklCunmqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZunmqr(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgerqf(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size); int onemklDgerqf(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size); int onemklCgerqf(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgerqf(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSormrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float *scratchpad, int64_t scratchpad_size); int onemklDormrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, double *tau, double *c, int64_t ldc, double *scratchpad, int64_t scratchpad_size); int onemklCunmrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZunmrq(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, int64_t *ipiv, float *scratchpad, int64_t scratchpad_size); int onemklDsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, int64_t *ipiv, double *scratchpad, int64_t scratchpad_size); int onemklCsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, int64_t *ipiv, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, int64_t *ipiv, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSorgtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, float *tau, float *scratchpad, int64_t scratchpad_size); int onemklDorgtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, double *tau, double *scratchpad, int64_t scratchpad_size); int onemklCungtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZungtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSormtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, float *a, int64_t lda, float *tau, float *c, int64_t ldc, float *scratchpad, int64_t scratchpad_size); int onemklDormtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, double *a, int64_t lda, double *tau, double *c, int64_t ldc, double *scratchpad, int64_t scratchpad_size); int onemklCunmtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *tau, float _Complex *c, int64_t ldc, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZunmtr(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *tau, double _Complex *c, int64_t ldc, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgels(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, float *a, int64_t lda, float *b, int64_t ldb, float *scratchpad, int64_t scratchpad_size); int onemklDgels(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, double *a, int64_t lda, double *b, int64_t ldb, double *scratchpad, int64_t scratchpad_size); int onemklCgels(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgels(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, float **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklDpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, double **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklCpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, float _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, double _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, int64_t *nrhs, float **a, int64_t *lda, float **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklDpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, int64_t *nrhs, double **a, int64_t *lda, double **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklCpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, int64_t *nrhs, float _Complex **a, int64_t *lda, float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64_t *n, int64_t *nrhs, double _Complex **a, int64_t *lda, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgeinv_batch(syclQueue_t device_queue, int64_t *n, float **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklDgeinv_batch(syclQueue_t device_queue, int64_t *n, double **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklCgeinv_batch(syclQueue_t device_queue, int64_t *n, float _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgeinv_batch(syclQueue_t device_queue, int64_t *n, double _Complex **a, int64_t *lda, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *n, int64_t *nrhs, float **a, int64_t *lda, int64_t **ipiv, float **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklDgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *n, int64_t *nrhs, double **a, int64_t *lda, int64_t **ipiv, double **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklCgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *n, int64_t *nrhs, float _Complex **a, int64_t *lda, int64_t **ipiv, float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *n, int64_t *nrhs, double _Complex **a, int64_t *lda, int64_t **ipiv, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgetri_batch(syclQueue_t device_queue, int64_t *n, float **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklDgetri_batch(syclQueue_t device_queue, int64_t *n, double **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklCgetri_batch(syclQueue_t device_queue, int64_t *n, float _Complex **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgetri_batch(syclQueue_t device_queue, int64_t *n, double _Complex **a, int64_t *lda, int64_t **ipiv, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float **a, int64_t *lda, float **tau, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklDgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double **a, int64_t *lda, double **tau, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklCgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, float _Complex **a, int64_t *lda, float _Complex **tau, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, double _Complex **a, int64_t *lda, double _Complex **tau, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSorgqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, int64_t *k, float **a, int64_t *lda, float **tau, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklDorgqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, int64_t *k, double **a, int64_t *lda, double **tau, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklCungqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, int64_t *k, float _Complex **a, int64_t *lda, float _Complex **tau, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZungqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n, int64_t *k, double _Complex **a, int64_t *lda, double _Complex **tau, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSormqr_batch(syclQueue_t device_queue, onemklSide *side, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *k, float **a, int64_t *lda, float **tau, float **c, int64_t *ldc, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklDormqr_batch(syclQueue_t device_queue, onemklSide *side, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *k, double **a, int64_t *lda, double **tau, double **c, int64_t *ldc, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklCunmqr_batch(syclQueue_t device_queue, onemklSide *side, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *k, float _Complex **a, int64_t *lda, float _Complex **tau, float _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZunmqr_batch(syclQueue_t device_queue, onemklSide *side, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *k, double _Complex **a, int64_t *lda, double _Complex **tau, double _Complex **c, int64_t *ldc, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklStrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemklTranspose *trans, onemklDiag *diag, int64_t *n, int64_t *nrhs, float **a, int64_t *lda, float **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklDtrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemklTranspose *trans, onemklDiag *diag, int64_t *n, int64_t *nrhs, double **a, int64_t *lda, double **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklCtrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemklTranspose *trans, onemklDiag *diag, int64_t *n, int64_t *nrhs, float _Complex **a, int64_t *lda, float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZtrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemklTranspose *trans, onemklDiag *diag, int64_t *n, int64_t *nrhs, double _Complex **a, int64_t *lda, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgels_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *nrhs, float **a, int64_t *lda, float **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float *scratchpad, int64_t scratchpad_size); int onemklDgels_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *nrhs, double **a, int64_t *lda, double **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double *scratchpad, int64_t scratchpad_size); int onemklCgels_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *nrhs, float _Complex **a, int64_t *lda,float _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgels_batch(syclQueue_t device_queue, onemklTranspose *trans, int64_t *m, int64_t *n, int64_t *nrhs, double _Complex **a, int64_t *lda, double _Complex **b, int64_t *ldb, int64_t group_count, int64_t *group_sizes, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklDpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklCpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklDpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklCpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float *a, int64_t lda, int64_t stride_a, float *tau, int64_t stride_tau, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklDgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double *a, int64_t lda, int64_t stride_a, double *tau, int64_t stride_tau, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklCgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *tau, int64_t stride_tau, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *tau, int64_t stride_tau, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSorgqr_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float *a, int64_t lda, int64_t stride_a, float *tau, int64_t stride_tau, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklDorgqr_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double *a, int64_t lda, int64_t stride_a, double *tau, int64_t stride_tau, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklCungqr_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, float _Complex *a, int64_t lda, int64_t stride_a, float _Complex *tau, int64_t stride_tau, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZungqr_batch_strided(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, double _Complex *a, int64_t lda, int64_t stride_a, double _Complex *tau, int64_t stride_tau, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgetri_batch_strided(syclQueue_t device_queue, int64_t n, float *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklDgetri_batch_strided(syclQueue_t device_queue, int64_t n, double *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklCgetri_batch_strided(syclQueue_t device_queue, int64_t n, float _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgetri_batch_strided(syclQueue_t device_queue, int64_t n, double _Complex *a, int64_t lda, int64_t stride_a, int64_t *ipiv, int64_t stride_ipiv, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int onemklSgels_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, float *_a, int64_t lda, int64_t stride_a, float *_b, int64_t ldb, int64_t stride_b, int64_t batch_size, float *scratchpad, int64_t scratchpad_size); int onemklDgels_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, double *_a, int64_t lda, int64_t stride_a, double *_b, int64_t ldb, int64_t stride_b, int64_t batch_size, double *scratchpad, int64_t scratchpad_size); int onemklCgels_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, float _Complex *_a, int64_t lda, int64_t stride_a, float _Complex *_b, int64_t ldb, int64_t stride_b, int64_t batch_size, float _Complex *scratchpad, int64_t scratchpad_size); int onemklZgels_batch_strided(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, double _Complex *_a, int64_t lda, int64_t stride_a, double _Complex *_b, int64_t ldb, int64_t stride_b, int64_t batch_size, double _Complex *scratchpad, int64_t scratchpad_size); int64_t onemklSgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklDgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklCgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklZgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklSgels_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklDgels_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklCgels_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklZgels_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklSgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklDgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklCgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklZgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklSgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklDgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklCgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklZgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklSgesv_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklDgesv_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklCgesv_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklZgesv_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklSgesvd_scratchpad_size(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, int64_t lda, int64_t ldu, int64_t ldvt); int64_t onemklDgesvd_scratchpad_size(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, int64_t lda, int64_t ldu, int64_t ldvt); int64_t onemklCgesvd_scratchpad_size(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, int64_t lda, int64_t ldu, int64_t ldvt); int64_t onemklZgesvd_scratchpad_size(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobsvd jobvt, int64_t m, int64_t n, int64_t lda, int64_t ldu, int64_t ldvt); int64_t onemklSgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklDgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklCgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklZgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklSgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklDgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklCgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklZgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda); int64_t onemklSgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda); int64_t onemklDgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda); int64_t onemklCgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda); int64_t onemklZgetri_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda); int64_t onemklSgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklDgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklCgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklZgetrs_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklCheev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklZheev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklCheevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklZheevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklCheevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t ldz); int64_t onemklZheevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t ldz); int64_t onemklChegvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb); int64_t onemklZhegvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb); int64_t onemklChegvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t ldz); int64_t onemklZhegvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t ldz); int64_t onemklChetrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklZhetrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklChetrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklZhetrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklSorgbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda); int64_t onemklDorgbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda); int64_t onemklSorgqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda); int64_t onemklDorgqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda); int64_t onemklSorgtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklDorgtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklSormqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc); int64_t onemklDormqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc); int64_t onemklSormrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc); int64_t onemklDormrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc); int64_t onemklSormtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc); int64_t onemklDormtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc); int64_t onemklSpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklDpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklCpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklZpotrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklSpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklDpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklCpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklZpotri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklSpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklDpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklCpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklZpotrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklSsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz); int64_t onemklDsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz); int64_t onemklCsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz); int64_t onemklZsteqr_scratchpad_size(syclQueue_t device_queue, onemklCompz compz, int64_t n, int64_t ldz); int64_t onemklSsyev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklDsyev_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklSsyevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklDsyevd_scratchpad_size(syclQueue_t device_queue, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklSsyevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t ldz); int64_t onemklDsyevx_scratchpad_size(syclQueue_t device_queue, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t ldz); int64_t onemklSsygvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb); int64_t onemklDsygvd_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklJob jobz, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb); int64_t onemklSsygvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb, float *vl, float *vu, int64_t il, int64_t iu, float *abstol, int64_t ldz); int64_t onemklDsygvx_scratchpad_size(syclQueue_t device_queue, int64_t itype, onemklCompz jobz, onemklRangev range, onemklUplo uplo, int64_t n, int64_t lda, int64_t ldb, double *vl, double *vu, int64_t il, int64_t iu, double *abstol, int64_t ldz); int64_t onemklSsytrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklDsytrd_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklSsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklDsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklCsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklZsytrf_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklStrtri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, int64_t lda); int64_t onemklDtrtri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, int64_t lda); int64_t onemklCtrtri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, int64_t lda); int64_t onemklZtrtri_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklDiag diag, int64_t n, int64_t lda); int64_t onemklStrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklDtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklCtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklZtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, onemklTranspose trans, onemklDiag diag, int64_t n, int64_t nrhs, int64_t lda, int64_t ldb); int64_t onemklCungbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda); int64_t onemklZungbr_scratchpad_size(syclQueue_t device_queue, onemklGenerate vect, int64_t m, int64_t n, int64_t k, int64_t lda); int64_t onemklCungqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda); int64_t onemklZungqr_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda); int64_t onemklCungtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklZungtr_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda); int64_t onemklCunmqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc); int64_t onemklZunmqr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc); int64_t onemklCunmrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc); int64_t onemklZunmrq_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklTranspose trans, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t ldc); int64_t onemklCunmtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc); int64_t onemklZunmtr_scratchpad_size(syclQueue_t device_queue, onemklSide side, onemklUplo uplo, onemklTranspose trans, int64_t m, int64_t n, int64_t lda, int64_t ldc); int64_t onemklSgeinv_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklDgeinv_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklCgeinv_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklZgeinv_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklSgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklDgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklCgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklZgels_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklSgels_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklDgels_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklCgels_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklZgels_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklSgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklDgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklCgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklZgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklSgeqrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size); int64_t onemklDgeqrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size); int64_t onemklCgeqrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size); int64_t onemklZgeqrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size); int64_t onemklSgesvda_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_s, int64_t ldu, int64_t stride_u, int64_t ldvt, int64_t stride_vt, int64_t batch_size); int64_t onemklDgesvda_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_s, int64_t ldu, int64_t stride_u, int64_t ldvt, int64_t stride_vt, int64_t batch_size); int64_t onemklCgesvda_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_s, int64_t ldu, int64_t stride_u, int64_t ldvt, int64_t stride_vt, int64_t batch_size); int64_t onemklZgesvda_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_s, int64_t ldu, int64_t stride_u, int64_t ldvt, int64_t stride_vt, int64_t batch_size); int64_t onemklSgetrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size); int64_t onemklDgetrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size); int64_t onemklCgetrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size); int64_t onemklZgetrf_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size); int64_t onemklSgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklDgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklCgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklZgetrf_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklSgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size); int64_t onemklDgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size); int64_t onemklCgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size); int64_t onemklZgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size); int64_t onemklSgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklDgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklCgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklZgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklSgetri_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklDgetri_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklCgetri_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklZgetri_batch_scratchpad_size(syclQueue_t device_queue, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklSgetri_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size); int64_t onemklDgetri_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size); int64_t onemklCgetri_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size); int64_t onemklZgetri_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t n, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t batch_size); int64_t onemklSgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklDgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklCgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklZgetrs_batch_scratchpad_size(syclQueue_t device_queue, onemklTranspose* trans, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklSgetrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklDgetrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklCgetrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklZgetrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t stride_ipiv, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklSgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklDgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklCgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklZgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklTranspose trans, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklSorgqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklDorgqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklSorgqr_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size); int64_t onemklDorgqr_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size); int64_t onemklSormqr_batch_scratchpad_size(syclQueue_t device_queue, onemklSide* side, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t* ldc, int64_t group_count, int64_t* group_sizes); int64_t onemklDormqr_batch_scratchpad_size(syclQueue_t device_queue, onemklSide* side, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t* ldc, int64_t group_count, int64_t* group_sizes); int64_t onemklSpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklDpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklCpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklZpotrf_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklSpotrf_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size); int64_t onemklDpotrf_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size); int64_t onemklCpotrf_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size); int64_t onemklZpotrf_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t lda, int64_t stride_a, int64_t batch_size); int64_t onemklSpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklDpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklCpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklZpotrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklSpotrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklDpotrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklCpotrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklZpotrs_batch_strided_scratchpad_size(syclQueue_t device_queue, onemklUplo uplo, int64_t n, int64_t nrhs, int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size); int64_t onemklStrtrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, onemklTranspose* trans, onemklDiag* diag, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklDtrtrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, onemklTranspose* trans, onemklDiag* diag, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklCtrtrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, onemklTranspose* trans, onemklDiag* diag, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklZtrtrs_batch_scratchpad_size(syclQueue_t device_queue, onemklUplo* uplo, onemklTranspose* trans, onemklDiag* diag, int64_t* n, int64_t* nrhs, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* group_sizes); int64_t onemklCungqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklZungqr_batch_scratchpad_size(syclQueue_t device_queue, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t group_count, int64_t* group_sizes); int64_t onemklCungqr_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size); int64_t onemklZungqr_batch_strided_scratchpad_size(syclQueue_t device_queue, int64_t m, int64_t n, int64_t k, int64_t lda, int64_t stride_a, int64_t stride_tau, int64_t batch_size); int64_t onemklCunmqr_batch_scratchpad_size(syclQueue_t device_queue, onemklSide* side, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t* ldc, int64_t group_count, int64_t* group_sizes); int64_t onemklZunmqr_batch_scratchpad_size(syclQueue_t device_queue, onemklSide* side, onemklTranspose* trans, int64_t* m, int64_t* n, int64_t* k, int64_t* lda, int64_t* ldc, int64_t group_count, int64_t* group_sizes); // SPARSE int onemklXsparse_init_matrix_handle(matrix_handle_t *p_spMat); int onemklXsparse_release_matrix_handle(syclQueue_t device_queue, matrix_handle_t *p_spMat); int onemklSsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, onemklIndex index, int32_t *row_ptr, int32_t *col_ind, float *values); int onemklSsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, onemklIndex index, int64_t *row_ptr, int64_t *col_ind, float *values); int onemklDsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, onemklIndex index, int32_t *row_ptr, int32_t *col_ind, double *values); int onemklDsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, onemklIndex index, int64_t *row_ptr, int64_t *col_ind, double *values); int onemklCsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, onemklIndex index, int32_t *row_ptr, int32_t *col_ind, float _Complex *values); int onemklCsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, onemklIndex index, int64_t *row_ptr, int64_t *col_ind, float _Complex *values); int onemklZsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, onemklIndex index, int32_t *row_ptr, int32_t *col_ind, double _Complex *values); int onemklZsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, onemklIndex index, int64_t *row_ptr, int64_t *col_ind, double _Complex *values); int onemklSsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, int32_t nnz, onemklIndex index, int32_t *row_ind, int32_t *col_ind, float *values); int onemklSsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, int64_t nnz, onemklIndex index, int64_t *row_ind, int64_t *col_ind, float *values); int onemklDsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, int32_t nnz, onemklIndex index, int32_t *row_ind, int32_t *col_ind, double *values); int onemklDsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, int64_t nnz, onemklIndex index, int64_t *row_ind, int64_t *col_ind, double *values); int onemklCsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, int32_t nnz, onemklIndex index, int32_t *row_ind, int32_t *col_ind, float _Complex *values); int onemklCsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, int64_t nnz, onemklIndex index, int64_t *row_ind, int64_t *col_ind, float _Complex *values); int onemklZsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t spMat, int32_t nrows, int32_t ncols, int32_t nnz, onemklIndex index, int32_t *row_ind, int32_t *col_ind, double _Complex *values); int onemklZsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handle_t spMat, int64_t nrows, int64_t ncols, int64_t nnz, onemklIndex index, int64_t *row_ind, int64_t *col_ind, double _Complex *values); int onemklXsparse_init_matmat_descr(matmat_descr_t *p_desc); int onemklXsparse_release_matmat_descr(matmat_descr_t *p_desc); int onemklXsparse_init_omatconvert_descr(syclQueue_t device_queue, omatconvert_descr_t *p_descr); int onemklXsparse_release_omatconvert_descr(syclQueue_t device_queue, omatconvert_descr_t descr); int onemklXsparse_init_omatadd_descr(syclQueue_t device_queue, omatadd_descr_t *p_omatadd_desc); int onemklXsparse_release_omatadd_descr(syclQueue_t device_queue, omatadd_descr_t omatadd_desc); int onemklXsparse_omatcopy(syclQueue_t device_queue, onemklTranspose transpose_val, matrix_handle_t spMat_in, matrix_handle_t spMat_out); int onemklXsparse_sort_matrix(syclQueue_t device_queue, matrix_handle_t spMat); int onemklSsparse_update_diagonal_values(syclQueue_t device_queue, matrix_handle_t spMat, int64_t length, float *new_diag_values); int onemklDsparse_update_diagonal_values(syclQueue_t device_queue, matrix_handle_t spMat, int64_t length, double *new_diag_values); int onemklCsparse_update_diagonal_values(syclQueue_t device_queue, matrix_handle_t spMat, int64_t length, float _Complex *new_diag_values); int onemklZsparse_update_diagonal_values(syclQueue_t device_queue, matrix_handle_t spMat, int64_t length, double _Complex *new_diag_values); int onemklXsparse_optimize_gemv(syclQueue_t device_queue, onemklTranspose opA, matrix_handle_t A); int onemklXsparse_optimize_trmv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A); int onemklXsparse_optimize_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A); int onemklXsparse_optimize_gemm(syclQueue_t device_queue, onemklTranspose opA, matrix_handle_t A); int onemklXsparse_optimize_gemm_advanced(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opB, matrix_handle_t A, int64_t columns); int onemklXsparse_optimize_trsm(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A); int onemklXsparse_optimize_trsm_advanced(syclQueue_t device_queue, onemklLayout layout_val, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A, int64_t columns); int onemklSsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, float *alpha, matrix_handle_t A, float *x, float *beta, float *y); int onemklDsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, double *alpha, matrix_handle_t A, double *x, double *beta, double *y); int onemklCsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, float _Complex *alpha, matrix_handle_t A, float _Complex *x, float _Complex *beta, float _Complex *y); int onemklZsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, double _Complex *alpha, matrix_handle_t A, double _Complex *x, double _Complex *beta, double _Complex *y); int onemklSsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA, float *alpha, matrix_handle_t A, float *x, float *beta, float *y, float *d); int onemklDsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA, double *alpha, matrix_handle_t A, double *x, double *beta, double *y, double *d); int onemklCsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA, float _Complex *alpha, matrix_handle_t A, float _Complex *x, float _Complex *beta, float _Complex *y, float _Complex *d); int onemklZsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA, double _Complex *alpha, matrix_handle_t A, double _Complex *x, double _Complex *beta, double _Complex *y, double _Complex *d); int onemklSsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, float *alpha, matrix_handle_t A, float *x, float *beta, float *y); int onemklDsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, double *alpha, matrix_handle_t A, double *x, double *beta, double *y); int onemklCsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, float _Complex *alpha, matrix_handle_t A, float _Complex *x, float _Complex *beta, float _Complex *y); int onemklZsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, double _Complex *alpha, matrix_handle_t A, double _Complex *x, double _Complex *beta, double _Complex *y); int onemklSsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, float *alpha, matrix_handle_t A, float *x, float *beta, float *y); int onemklDsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, double *alpha, matrix_handle_t A, double *x, double *beta, double *y); int onemklCsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, float _Complex *alpha, matrix_handle_t A, float _Complex *x, float _Complex *beta, float _Complex *y); int onemklZsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, double _Complex *alpha, matrix_handle_t A, double _Complex *x, double _Complex *beta, double _Complex *y); int onemklSsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, float *alpha, matrix_handle_t A, float *x, float *y); int onemklDsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, double *alpha, matrix_handle_t A, double *x, double *y); int onemklCsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, float _Complex *alpha, matrix_handle_t A, float _Complex *x, float _Complex *y); int onemklZsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, double _Complex *alpha, matrix_handle_t A, double _Complex *x, double _Complex *y); int onemklSsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, float *alpha, matrix_handle_t A, float *X, int64_t columns, int64_t ldx, float *beta, float *Y, int64_t ldy); int onemklDsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, double *alpha, matrix_handle_t A, double *X, int64_t columns, int64_t ldx, double *beta, double *Y, int64_t ldy); int onemklCsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, float _Complex *alpha, matrix_handle_t A, float _Complex *X, int64_t columns, int64_t ldx, float _Complex *beta, float _Complex *Y, int64_t ldy); int onemklZsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, double _Complex *alpha, matrix_handle_t A, double _Complex *X, int64_t columns, int64_t ldx, double _Complex *beta, double _Complex *Y, int64_t ldy); int onemklSsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, onemklUplo uplo_val, onemklDiag diag_val, float *alpha, matrix_handle_t A, float *X, int64_t columns, int64_t ldx, float *Y, int64_t ldy); int onemklDsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, onemklUplo uplo_val, onemklDiag diag_val, double *alpha, matrix_handle_t A, double *X, int64_t columns, int64_t ldx, double *Y, int64_t ldy); int onemklCsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, onemklUplo uplo_val, onemklDiag diag_val, float _Complex *alpha, matrix_handle_t A, float _Complex *X, int64_t columns, int64_t ldx, float _Complex *Y, int64_t ldy); int onemklZsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opX, onemklUplo uplo_val, onemklDiag diag_val, double _Complex *alpha, matrix_handle_t A, double _Complex *X, int64_t columns, int64_t ldx, double _Complex *Y, int64_t ldy); int onemklXsparse_set_matmat_data(matmat_descr_t descr, onemklMatrixView viewA, onemklTranspose opA, onemklMatrixView viewB, onemklTranspose opB, onemklMatrixView viewC); int onemklSsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layout, onemklTranspose opA, onemklTranspose opB, float *alpha, matrix_handle_t A, matrix_handle_t B, float *beta, float *C, int64_t c_nrows, int64_t c_ncols, int64_t ldc); int onemklDsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layout, onemklTranspose opA, onemklTranspose opB, double *alpha, matrix_handle_t A, matrix_handle_t B, double *beta, double *C, int64_t c_nrows, int64_t c_ncols, int64_t ldc); int onemklCsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layout, onemklTranspose opA, onemklTranspose opB, float _Complex *alpha, matrix_handle_t A, matrix_handle_t B, float _Complex *beta, float _Complex *C, int64_t c_nrows, int64_t c_ncols, int64_t ldc); int onemklZsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layout, onemklTranspose opA, onemklTranspose opB, double _Complex *alpha, matrix_handle_t A, matrix_handle_t B, double _Complex *beta, double _Complex *C, int64_t c_nrows, int64_t c_ncols, int64_t ldc); int onemklXsparse_matmat(syclQueue_t device_queue, matrix_handle_t A, matrix_handle_t B, matrix_handle_t C, onemklMatmatRequest req, matmat_descr_t descr, int64_t *sizeTempBuffer, void *tempBuffer); int onemklDestroy(void); #ifdef __cplusplus } #endif ================================================ FILE: deps/src/onemkl_dft.cpp ================================================ #include "onemkl_dft.h" #include "sycl.hpp" // internal struct definitions #include #include #include #include #include #include using namespace oneapi::mkl::dft; struct onemklDftDescriptor_st { precision prec; domain dom; void *ptr; // pointer to concrete descriptor }; static inline precision to_prec(onemklDftPrecision p) { return (p == ONEMKL_DFT_PRECISION_DOUBLE) ? precision::DOUBLE : precision::SINGLE; } static inline domain to_dom(onemklDftDomain d) { return (d == ONEMKL_DFT_DOMAIN_COMPLEX) ? domain::COMPLEX : domain::REAL; } // Helper to allocate descriptor depending on precision/domain static int allocate_descriptor(onemklDftDescriptor_t *out, precision p, domain d, const std::vector &lengths) { try { auto *desc = new onemklDftDescriptor_st(); desc->prec = p; desc->dom = d; if (p == precision::SINGLE && d == domain::REAL) { desc->ptr = new descriptor(lengths); } else if (p == precision::SINGLE && d == domain::COMPLEX) { desc->ptr = new descriptor(lengths); } else if (p == precision::DOUBLE && d == domain::REAL) { desc->ptr = new descriptor(lengths); } else { // DOUBLE COMPLEX desc->ptr = new descriptor(lengths); } *out = desc; return 0; } catch (...) { return -1; } } int onemklDftCreate1D(onemklDftDescriptor_t *desc, onemklDftPrecision precision, onemklDftDomain domain, int64_t length) { std::vector dims{length}; return allocate_descriptor(desc, to_prec(precision), to_dom(domain), dims); } int onemklDftCreateND(onemklDftDescriptor_t *desc, onemklDftPrecision precision, onemklDftDomain domain, int64_t dim, const int64_t *lengths) { if (dim <= 0 || lengths == nullptr) return -2; std::vector dims(lengths, lengths + dim); return allocate_descriptor(desc, to_prec(precision), to_dom(domain), dims); } int onemklDftDestroy(onemklDftDescriptor_t desc) { if (!desc) return 0; try { if (desc->prec == precision::SINGLE && desc->dom == domain::REAL) { delete static_cast< descriptor* >(desc->ptr); } else if (desc->prec == precision::SINGLE && desc->dom == domain::COMPLEX) { delete static_cast< descriptor* >(desc->ptr); } else if (desc->prec == precision::DOUBLE && desc->dom == domain::REAL) { delete static_cast< descriptor* >(desc->ptr); } else { delete static_cast< descriptor* >(desc->ptr); } delete desc; return 0; } catch (...) { return -1; } } int onemklDftCommit(onemklDftDescriptor_t desc, syclQueue_t queue) { if (!desc || !queue) return -2; try { if (desc->prec == precision::SINGLE && desc->dom == domain::REAL) { static_cast< descriptor* >(desc->ptr)->commit(queue->val); } else if (desc->prec == precision::SINGLE && desc->dom == domain::COMPLEX) { static_cast< descriptor* >(desc->ptr)->commit(queue->val); } else if (desc->prec == precision::DOUBLE && desc->dom == domain::REAL) { static_cast< descriptor* >(desc->ptr)->commit(queue->val); } else { static_cast< descriptor* >(desc->ptr)->commit(queue->val); } return 0; } catch (...) { return -1; } } // Internal mapping helpers. We cannot rely on numeric equality between our // exported onemklDftConfigParam enumeration values (which are compact and // stable for Julia) and oneMKL's internal sparse enum values. Provide an // explicit translation layer. static inline config_param to_param(onemklDftConfigParam p) { switch(p) { case ONEMKL_DFT_PARAM_FORWARD_DOMAIN: return config_param::FORWARD_DOMAIN; case ONEMKL_DFT_PARAM_DIMENSION: return config_param::DIMENSION; case ONEMKL_DFT_PARAM_LENGTHS: return config_param::LENGTHS; case ONEMKL_DFT_PARAM_PRECISION: return config_param::PRECISION; case ONEMKL_DFT_PARAM_FORWARD_SCALE: return config_param::FORWARD_SCALE; case ONEMKL_DFT_PARAM_BACKWARD_SCALE: return config_param::BACKWARD_SCALE; case ONEMKL_DFT_PARAM_NUMBER_OF_TRANSFORMS: return config_param::NUMBER_OF_TRANSFORMS; case ONEMKL_DFT_PARAM_COMPLEX_STORAGE: return config_param::COMPLEX_STORAGE; case ONEMKL_DFT_PARAM_PLACEMENT: return config_param::PLACEMENT; case ONEMKL_DFT_PARAM_INPUT_STRIDES: return config_param::INPUT_STRIDES; case ONEMKL_DFT_PARAM_OUTPUT_STRIDES: return config_param::OUTPUT_STRIDES; case ONEMKL_DFT_PARAM_FWD_DISTANCE: return config_param::FWD_DISTANCE; case ONEMKL_DFT_PARAM_BWD_DISTANCE: return config_param::BWD_DISTANCE; case ONEMKL_DFT_PARAM_WORKSPACE: return config_param::WORKSPACE; case ONEMKL_DFT_PARAM_WORKSPACE_ESTIMATE_BYTES: return config_param::WORKSPACE_ESTIMATE_BYTES; case ONEMKL_DFT_PARAM_WORKSPACE_BYTES: return config_param::WORKSPACE_BYTES; case ONEMKL_DFT_PARAM_FWD_STRIDES: return config_param::FWD_STRIDES; case ONEMKL_DFT_PARAM_BWD_STRIDES: return config_param::BWD_STRIDES; case ONEMKL_DFT_PARAM_WORKSPACE_PLACEMENT: return config_param::WORKSPACE_PLACEMENT; case ONEMKL_DFT_PARAM_WORKSPACE_EXTERNAL_BYTES: return config_param::WORKSPACE_EXTERNAL_BYTES; default: return config_param::FORWARD_DOMAIN; // defensive; shouldn't happen } } // Explicit value mapping (avoid relying on underlying enum integral values) static inline config_value to_cvalue(onemklDftConfigValue v) { switch (v) { case ONEMKL_DFT_VALUE_COMMITTED: return config_value::COMMITTED; case ONEMKL_DFT_VALUE_UNCOMMITTED: return config_value::UNCOMMITTED; case ONEMKL_DFT_VALUE_COMPLEX_COMPLEX: return config_value::COMPLEX_COMPLEX; case ONEMKL_DFT_VALUE_REAL_REAL: return config_value::REAL_REAL; case ONEMKL_DFT_VALUE_INPLACE: return config_value::INPLACE; case ONEMKL_DFT_VALUE_NOT_INPLACE: return config_value::NOT_INPLACE; case ONEMKL_DFT_VALUE_WORKSPACE_AUTOMATIC: return config_value::WORKSPACE_AUTOMATIC; case ONEMKL_DFT_VALUE_ALLOW: return config_value::ALLOW; case ONEMKL_DFT_VALUE_AVOID: return config_value::AVOID; case ONEMKL_DFT_VALUE_WORKSPACE_INTERNAL: return config_value::WORKSPACE_INTERNAL; case ONEMKL_DFT_VALUE_WORKSPACE_EXTERNAL: return config_value::WORKSPACE_EXTERNAL; default: return config_value::UNCOMMITTED; // defensive fallback } } static inline onemklDftConfigValue from_cvalue(config_value cv) { switch (cv) { case config_value::COMMITTED: return ONEMKL_DFT_VALUE_COMMITTED; case config_value::UNCOMMITTED: return ONEMKL_DFT_VALUE_UNCOMMITTED; case config_value::COMPLEX_COMPLEX: return ONEMKL_DFT_VALUE_COMPLEX_COMPLEX; case config_value::REAL_REAL: return ONEMKL_DFT_VALUE_REAL_REAL; case config_value::INPLACE: return ONEMKL_DFT_VALUE_INPLACE; case config_value::NOT_INPLACE: return ONEMKL_DFT_VALUE_NOT_INPLACE; case config_value::WORKSPACE_AUTOMATIC: return ONEMKL_DFT_VALUE_WORKSPACE_AUTOMATIC; case config_value::ALLOW: return ONEMKL_DFT_VALUE_ALLOW; case config_value::AVOID: return ONEMKL_DFT_VALUE_AVOID; case config_value::WORKSPACE_INTERNAL: return ONEMKL_DFT_VALUE_WORKSPACE_INTERNAL; case config_value::WORKSPACE_EXTERNAL: return ONEMKL_DFT_VALUE_WORKSPACE_EXTERNAL; default: return ONEMKL_DFT_VALUE_UNCOMMITTED; // unknown / unsupported -> safe default } } // Dispatch macro re-used for configuration #define ONEMKL_DFT_DISPATCH_CFG(desc_expr, CALL) \ do { \ if (desc->prec == precision::SINGLE && desc->dom == domain::REAL) { \ auto *d = static_cast< descriptor* >(desc_expr); \ CALL; \ } else if (desc->prec == precision::SINGLE && desc->dom == domain::COMPLEX) { \ auto *d = static_cast< descriptor* >(desc_expr); \ CALL; \ } else if (desc->prec == precision::DOUBLE && desc->dom == domain::REAL) { \ auto *d = static_cast< descriptor* >(desc_expr); \ CALL; \ } else { \ auto *d = static_cast< descriptor* >(desc_expr); \ CALL; \ } \ } while (0) int onemklDftSetValueInt64(onemklDftDescriptor_t desc, onemklDftConfigParam param, int64_t value) { if (!desc) return -2; if (!desc->ptr) return -3; try { ONEMKL_DFT_DISPATCH_CFG(desc->ptr, d->set_value(to_param(param), value)); return 0; } catch (...) { return -1; } } int onemklDftSetValueDouble(onemklDftDescriptor_t desc, onemklDftConfigParam param, double value) { if (!desc) return -2; if (!desc->ptr) return -3; try { ONEMKL_DFT_DISPATCH_CFG(desc->ptr, d->set_value(to_param(param), value)); return 0; } catch (...) { return -1; } } int onemklDftSetValueInt64Array(onemklDftDescriptor_t desc, onemklDftConfigParam param, const int64_t *values, int64_t n) { if (!desc || !values || n < 0) return -2; if (!desc->ptr) return -3; try { std::vector v(values, values + n); ONEMKL_DFT_DISPATCH_CFG(desc->ptr, d->set_value(to_param(param), v)); return 0; } catch (...) { return -1; } } int onemklDftSetValueConfigValue(onemklDftDescriptor_t desc, onemklDftConfigParam param, onemklDftConfigValue value) { if (!desc) return -2; if (!desc->ptr) return -3; try { ONEMKL_DFT_DISPATCH_CFG(desc->ptr, d->set_value(to_param(param), to_cvalue(value))); return 0; } catch (...) { return -1; } } int onemklDftGetValueInt64(onemklDftDescriptor_t desc, onemklDftConfigParam param, int64_t *value) { if (!desc || !value) return -2; if (!desc->ptr) return -3; try { ONEMKL_DFT_DISPATCH_CFG(desc->ptr, d->get_value(to_param(param), value)); return 0; } catch (...) { return -1; } } int onemklDftGetValueDouble(onemklDftDescriptor_t desc, onemklDftConfigParam param, double *value) { if (!desc || !value) return -2; if (!desc->ptr) return -3; try { ONEMKL_DFT_DISPATCH_CFG(desc->ptr, d->get_value(to_param(param), value)); return 0; } catch (...) { return -1; } } int onemklDftGetValueInt64Array(onemklDftDescriptor_t desc, onemklDftConfigParam param, int64_t *values, int64_t *n) { if (!desc || !values || !n || *n <= 0) return -2; if (!desc->ptr) return -3; try { std::vector v; ONEMKL_DFT_DISPATCH_CFG(desc->ptr, d->get_value(to_param(param), &v)); int64_t to_copy = (*n < (int64_t)v.size()) ? *n : (int64_t)v.size(); std::memcpy(values, v.data(), sizeof(int64_t)*to_copy); *n = to_copy; return 0; } catch (...) { return -1; } } int onemklDftGetValueConfigValue(onemklDftDescriptor_t desc, onemklDftConfigParam param, onemklDftConfigValue *value) { if (!desc || !value) return -2; if (!desc->ptr) return -3; try { config_value cv; ONEMKL_DFT_DISPATCH_CFG(desc->ptr, d->get_value(to_param(param), &cv)); *value = from_cvalue(cv); return 0; } catch (...) { return -1; } } // Helper macro to dispatch compute operations #define ONEMKL_DFT_DISPATCH(desc_expr, CALL) \ do { \ if (desc->prec == precision::SINGLE && desc->dom == domain::REAL) { \ auto *d = static_cast< descriptor* >(desc_expr); \ CALL; \ } else if (desc->prec == precision::SINGLE && desc->dom == domain::COMPLEX) { \ auto *d = static_cast< descriptor* >(desc_expr); \ CALL; \ } else if (desc->prec == precision::DOUBLE && desc->dom == domain::REAL) { \ auto *d = static_cast< descriptor* >(desc_expr); \ CALL; \ } else { \ auto *d = static_cast< descriptor* >(desc_expr); \ CALL; \ } \ } while (0) // Pointer (USM) dispatch with proper element typing rather than using void* directly. // Using void* caused instantiation of compute_forward/backward with template // parameters on some oneMKL versions, leading to unresolved symbols at runtime. int onemklDftComputeForward(onemklDftDescriptor_t desc, void *inout) { if (!desc || !inout) return -2; if (!desc->ptr) return -3; try { if (desc->dom == domain::REAL) { if (desc->prec == precision::SINGLE) { auto *p = static_cast(inout); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, p).wait()); } else { auto *p = static_cast(inout); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, p).wait()); } } else { // COMPLEX if (desc->prec == precision::SINGLE) { auto *p = static_cast*>(inout); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, p).wait()); } else { auto *p = static_cast*>(inout); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, p).wait()); } } return 0; } catch (...) { return -1; } } int onemklDftComputeForwardOutOfPlace(onemklDftDescriptor_t desc, void *in, void *out) { if (!desc || !in || !out) return -2; if (!desc->ptr) return -3; try { if (desc->dom == domain::REAL) { if (desc->prec == precision::SINGLE) { // Real-domain forward transform: real input -> complex output auto *pi = static_cast(in); auto *po = static_cast*>(out); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, pi, po).wait()); } else { auto *pi = static_cast(in); auto *po = static_cast*>(out); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, pi, po).wait()); } } else { // COMPLEX if (desc->prec == precision::SINGLE) { auto *pi = static_cast*>(in); auto *po = static_cast*>(out); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, pi, po).wait()); } else { auto *pi = static_cast*>(in); auto *po = static_cast*>(out); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, pi, po).wait()); } } return 0; } catch (...) { return -1; } } int onemklDftComputeBackward(onemklDftDescriptor_t desc, void *inout) { if (!desc || !inout) return -2; if (!desc->ptr) return -3; try { if (desc->dom == domain::REAL) { if (desc->prec == precision::SINGLE) { auto *p = static_cast(inout); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, p).wait()); } else { auto *p = static_cast(inout); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, p).wait()); } } else { // COMPLEX if (desc->prec == precision::SINGLE) { auto *p = static_cast*>(inout); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, p).wait()); } else { auto *p = static_cast*>(inout); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, p).wait()); } } return 0; } catch (...) { return -1; } } int onemklDftComputeBackwardOutOfPlace(onemklDftDescriptor_t desc, void *in, void *out) { if (!desc || !in || !out) return -2; if (!desc->ptr) return -3; try { if (desc->dom == domain::REAL) { if (desc->prec == precision::SINGLE) { // Real-domain backward transform: complex input -> real output auto *pi = static_cast*>(in); auto *po = static_cast(out); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, pi, po).wait()); } else { auto *pi = static_cast*>(in); auto *po = static_cast(out); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, pi, po).wait()); } } else { // COMPLEX if (desc->prec == precision::SINGLE) { auto *pi = static_cast*>(in); auto *po = static_cast*>(out); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, pi, po).wait()); } else { auto *pi = static_cast*>(in); auto *po = static_cast*>(out); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, pi, po).wait()); } } return 0; } catch (...) { return -1; } } // Keep dispatch macros defined for buffer variants below; undef at end of file. // Buffer API helpers: create temporary buffers referencing host memory. // NOTE: This assumes the memory is accessible and sized appropriately. template static inline sycl::buffer make_buffer(T *ptr, int64_t n) { return sycl::buffer(ptr, sycl::range<1>(static_cast(n))); } // Query total element count from LENGTHS config (product of lengths). static int64_t get_element_count(onemklDftDescriptor_t desc) { int64_t n = 0; int64_t dims = 0; if (onemklDftGetValueInt64(desc, ONEMKL_DFT_PARAM_DIMENSION, &dims) != 0) return -1; if (dims <= 0 || dims > 8) return -1; int64_t lens[16]; int64_t want = dims; if (onemklDftGetValueInt64Array(desc, ONEMKL_DFT_PARAM_LENGTHS, lens, &want) != 0) return -1; if (want != dims) return -1; int64_t total = 1; for (int i=0;iptr) return -3; int64_t n = get_element_count(desc); if (n <= 0) return -3; try { if (desc->dom == domain::REAL) { if (desc->prec == precision::SINGLE) { auto buf = make_buffer((float*)inout, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, buf)); } else { auto buf = make_buffer((double*)inout, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, buf)); } } else { // COMPLEX if (desc->prec == precision::SINGLE) { auto buf = make_buffer((std::complex*)inout, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, buf)); } else { auto buf = make_buffer((std::complex*)inout, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, buf)); } } return 0; } catch (...) { return -1; } } int onemklDftComputeForwardOutOfPlaceBuffer(onemklDftDescriptor_t desc, void *in, void *out) { if (!desc || !in || !out) return -2; if (!desc->ptr) return -3; int64_t n = get_element_count(desc); if (n <= 0) return -3; try { if (desc->dom == domain::REAL) { if (desc->prec == precision::SINGLE) { auto bufi = make_buffer((float*)in, n); /* complex output size may differ; assume caller sized */ auto bufo = make_buffer((std::complex*)out, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, bufi, bufo)); } else { auto bufi = make_buffer((double*)in, n); auto bufo = make_buffer((std::complex*)out, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, bufi, bufo)); } } else { if (desc->prec == precision::SINGLE) { auto bufi = make_buffer((std::complex*)in, n); auto bufo = make_buffer((std::complex*)out, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, bufi, bufo)); } else { auto bufi = make_buffer((std::complex*)in, n); auto bufo = make_buffer((std::complex*)out, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_forward(*d, bufi, bufo)); } } return 0; } catch (...) { return -1; } } int onemklDftComputeBackwardBuffer(onemklDftDescriptor_t desc, void *inout) { if (!desc || !inout) return -2; if (!desc->ptr) return -3; int64_t n = get_element_count(desc); if (n <= 0) return -3; try { if (desc->dom == domain::REAL) { if (desc->prec == precision::SINGLE) { auto buf = make_buffer((float*)inout, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, buf)); } else { auto buf = make_buffer((double*)inout, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, buf)); } } else { if (desc->prec == precision::SINGLE) { auto buf = make_buffer((std::complex*)inout, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, buf)); } else { auto buf = make_buffer((std::complex*)inout, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, buf)); } } return 0; } catch (...) { return -1; } } int onemklDftComputeBackwardOutOfPlaceBuffer(onemklDftDescriptor_t desc, void *in, void *out) { if (!desc || !in || !out) return -2; if (!desc->ptr) return -3; int64_t n = get_element_count(desc); if (n <= 0) return -3; try { if (desc->dom == domain::REAL) { if (desc->prec == precision::SINGLE) { auto bufi = make_buffer((std::complex*)in, n); auto bufo = make_buffer((float*)out, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, bufi, bufo)); } else { auto bufi = make_buffer((std::complex*)in, n); auto bufo = make_buffer((double*)out, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, bufi, bufo)); } } else { if (desc->prec == precision::SINGLE) { auto bufi = make_buffer((std::complex*)in, n); auto bufo = make_buffer((std::complex*)out, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, bufi, bufo)); } else { auto bufi = make_buffer((std::complex*)in, n); auto bufo = make_buffer((std::complex*)out, n); ONEMKL_DFT_DISPATCH(desc->ptr, compute_backward(*d, bufi, bufo)); } } return 0; } catch (...) { return -1; } } #undef ONEMKL_DFT_DISPATCH #undef ONEMKL_DFT_DISPATCH_CFG // Introspection helper: capture integral values of config_param enums that we // rely upon in the Julia layer. We enumerate the sequence present in our C // header; if oneMKL's internal ordering diverges this will expose it. int onemklDftQueryParamIndices(int64_t *out, int64_t n) { if (!out || n < 20) return -2; // we expose 20 params currently try { #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" #elif defined(__GNUC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif config_param params[] = { config_param::FORWARD_DOMAIN, config_param::DIMENSION, config_param::LENGTHS, config_param::PRECISION, config_param::FORWARD_SCALE, config_param::BACKWARD_SCALE, config_param::NUMBER_OF_TRANSFORMS, config_param::COMPLEX_STORAGE, config_param::PLACEMENT, config_param::INPUT_STRIDES, config_param::OUTPUT_STRIDES, config_param::FWD_DISTANCE, config_param::BWD_DISTANCE, config_param::WORKSPACE, config_param::WORKSPACE_ESTIMATE_BYTES, config_param::WORKSPACE_BYTES, config_param::FWD_STRIDES, config_param::BWD_STRIDES, config_param::WORKSPACE_PLACEMENT, config_param::WORKSPACE_EXTERNAL_BYTES }; #if defined(__clang__) #pragma clang diagnostic pop #elif defined(__GNUC__) #pragma GCC diagnostic pop #endif for (int i=0;i<20;i++) out[i] = static_cast(params[i]); return 20; } catch (...) { return -1; } } ================================================ FILE: deps/src/onemkl_dft.h ================================================ #pragma once #include "sycl.h" #include #ifdef __cplusplus extern "C" { #endif // Return codes (negative values indicate errors): // 0 : success // -1 : internal error / exception caught // -2 : invalid argument (null pointer, bad length, etc.) // -3 : invalid descriptor state (e.g. uninitialized desc->ptr) or size query failure #define ONEMKL_DFT_STATUS_SUCCESS 0 #define ONEMKL_DFT_STATUS_ERROR -1 #define ONEMKL_DFT_STATUS_INVALID_ARGUMENT -2 #define ONEMKL_DFT_STATUS_BAD_STATE -3 // DFT precision typedef enum { ONEMKL_DFT_PRECISION_SINGLE = 0, ONEMKL_DFT_PRECISION_DOUBLE = 1 } onemklDftPrecision; // DFT domain typedef enum { ONEMKL_DFT_DOMAIN_REAL = 0, ONEMKL_DFT_DOMAIN_COMPLEX = 1 } onemklDftDomain; // Configuration parameters (subset mirrors oneapi::mkl::dft::config_param) typedef enum { ONEMKL_DFT_PARAM_FORWARD_DOMAIN = 0, ONEMKL_DFT_PARAM_DIMENSION, ONEMKL_DFT_PARAM_LENGTHS, ONEMKL_DFT_PARAM_PRECISION, ONEMKL_DFT_PARAM_FORWARD_SCALE, ONEMKL_DFT_PARAM_BACKWARD_SCALE, ONEMKL_DFT_PARAM_NUMBER_OF_TRANSFORMS, ONEMKL_DFT_PARAM_COMPLEX_STORAGE, ONEMKL_DFT_PARAM_PLACEMENT, ONEMKL_DFT_PARAM_INPUT_STRIDES, ONEMKL_DFT_PARAM_OUTPUT_STRIDES, ONEMKL_DFT_PARAM_FWD_DISTANCE, ONEMKL_DFT_PARAM_BWD_DISTANCE, ONEMKL_DFT_PARAM_WORKSPACE, // size query / placement ONEMKL_DFT_PARAM_WORKSPACE_ESTIMATE_BYTES, ONEMKL_DFT_PARAM_WORKSPACE_BYTES, ONEMKL_DFT_PARAM_FWD_STRIDES, ONEMKL_DFT_PARAM_BWD_STRIDES, ONEMKL_DFT_PARAM_WORKSPACE_PLACEMENT, ONEMKL_DFT_PARAM_WORKSPACE_EXTERNAL_BYTES } onemklDftConfigParam; // Configuration values (mirrors oneapi::mkl::dft::config_value) typedef enum { ONEMKL_DFT_VALUE_COMMITTED = 0, ONEMKL_DFT_VALUE_UNCOMMITTED, ONEMKL_DFT_VALUE_COMPLEX_COMPLEX, ONEMKL_DFT_VALUE_REAL_REAL, ONEMKL_DFT_VALUE_INPLACE, ONEMKL_DFT_VALUE_NOT_INPLACE, ONEMKL_DFT_VALUE_WORKSPACE_AUTOMATIC, // internal ONEMKL_DFT_VALUE_ALLOW, ONEMKL_DFT_VALUE_AVOID, ONEMKL_DFT_VALUE_WORKSPACE_INTERNAL, ONEMKL_DFT_VALUE_WORKSPACE_EXTERNAL } onemklDftConfigValue; // Opaque descriptor handle struct onemklDftDescriptor_st; typedef struct onemklDftDescriptor_st *onemklDftDescriptor_t; // Creation / destruction int onemklDftCreate1D(onemklDftDescriptor_t *desc, onemklDftPrecision precision, onemklDftDomain domain, int64_t length); int onemklDftCreateND(onemklDftDescriptor_t *desc, onemklDftPrecision precision, onemklDftDomain domain, int64_t dim, const int64_t *lengths); int onemklDftDestroy(onemklDftDescriptor_t desc); // Commit descriptor to a queue int onemklDftCommit(onemklDftDescriptor_t desc, syclQueue_t queue); // Configuration set int onemklDftSetValueInt64(onemklDftDescriptor_t desc, onemklDftConfigParam param, int64_t value); int onemklDftSetValueDouble(onemklDftDescriptor_t desc, onemklDftConfigParam param, double value); int onemklDftSetValueInt64Array(onemklDftDescriptor_t desc, onemklDftConfigParam param, const int64_t *values, int64_t n); int onemklDftSetValueConfigValue(onemklDftDescriptor_t desc, onemklDftConfigParam param, onemklDftConfigValue value); // Configuration get int onemklDftGetValueInt64(onemklDftDescriptor_t desc, onemklDftConfigParam param, int64_t *value); int onemklDftGetValueDouble(onemklDftDescriptor_t desc, onemklDftConfigParam param, double *value); // For array queries pass *n as available length; on return *n has elements written. int onemklDftGetValueInt64Array(onemklDftDescriptor_t desc, onemklDftConfigParam param, int64_t *values, int64_t *n); int onemklDftGetValueConfigValue(onemklDftDescriptor_t desc, onemklDftConfigParam param, onemklDftConfigValue *value); // Compute (USM) in-place/out-of-place. Pointers must reference memory // appropriate for precision/domain. No size checking is performed. int onemklDftComputeForward(onemklDftDescriptor_t desc, void *inout); int onemklDftComputeForwardOutOfPlace(onemklDftDescriptor_t desc, void *in, void *out); int onemklDftComputeBackward(onemklDftDescriptor_t desc, void *inout); int onemklDftComputeBackwardOutOfPlace(onemklDftDescriptor_t desc, void *in, void *out); // Compute (buffer API) variants. Host pointers are wrapped in temporary 1D buffers. int onemklDftComputeForwardBuffer(onemklDftDescriptor_t desc, void *inout); int onemklDftComputeForwardOutOfPlaceBuffer(onemklDftDescriptor_t desc, void *in, void *out); int onemklDftComputeBackwardBuffer(onemklDftDescriptor_t desc, void *inout); int onemklDftComputeBackwardOutOfPlaceBuffer(onemklDftDescriptor_t desc, void *in, void *out); // Introspection: write out the integral values of selected config_param enums in // the same order as our public enum declaration above. Returns number written or // a negative error code if n is insufficient or arguments invalid. int onemklDftQueryParamIndices(int64_t *out, int64_t n); #ifdef __cplusplus } #endif ================================================ FILE: deps/src/sycl.cpp ================================================ #include "sycl.hpp" #include // https://github.com/intel/llvm/blob/sycl/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp extern "C" int syclPlatformCreate(syclPlatform_t *obj, ze_driver_handle_t driver) { auto sycl_platform = sycl::make_platform(driver); *obj = new syclPlatform_st({sycl_platform}); return 0; } extern "C" int syclPlatformDestroy(syclPlatform_t obj) { delete obj; return 0; } extern "C" int syclDeviceCreate(syclDevice_t *obj, syclPlatform_t platform, ze_device_handle_t device) { auto sycl_device = sycl::make_device(device); *obj = new syclDevice_st({sycl_device}); return 0; } extern "C" int syclDeviceDestroy(syclDevice_t obj) { delete obj; return 0; } extern "C" int syclContextCreate(syclContext_t *obj, syclDevice_t *devices, size_t ndevices, ze_context_handle_t context, int keep_ownership) { std::vector sycl_devices(ndevices); for (size_t i = 0; i < ndevices; i++) sycl_devices[i] = devices[i]->val; auto sycl_ownership = keep_ownership ? sycl::ext::oneapi::level_zero::ownership::keep : sycl::ext::oneapi::level_zero::ownership::transfer; sycl::backend_input_t sycl_context_input = {context, sycl_devices, sycl_ownership}; auto sycl_context = sycl::make_context( sycl_context_input); *obj = new syclContext_st({sycl_context}); return 0; } extern "C" int syclContextDestroy(syclContext_t obj) { delete obj; return 0; } extern "C" int syclQueueCreate(syclQueue_t *obj, syclContext_t context, syclDevice_t device, ze_command_queue_handle_t queue, int keep_ownership) { auto sycl_ownership = keep_ownership ? sycl::ext::oneapi::level_zero::ownership::keep : sycl::ext::oneapi::level_zero::ownership::transfer; auto sycl_queue_input = sycl::backend_input_t{queue, device->val, sycl_ownership}; auto sycl_queue = sycl::make_queue( sycl_queue_input, context->val); *obj = new syclQueue_st({sycl_queue}); return 0; } extern "C" int syclQueueDestroy(syclQueue_t obj) { delete obj; return 0; } extern "C" int syclQueueWait(syclQueue_t obj) { obj->val.wait(); return 0; } extern "C" int syclEventCreate(syclEvent_t *obj, syclContext_t context, ze_event_handle_t event, int keep_ownership) { auto sycl_ownership = keep_ownership ? sycl::ext::oneapi::level_zero::ownership::keep : sycl::ext::oneapi::level_zero::ownership::transfer; auto sycl_event_input = sycl::backend_input_t{event, sycl_ownership}; auto sycl_event = sycl::make_event( sycl_event_input, context->val); *obj = new syclEvent_st({sycl_event}); return 0; } extern "C" int syclEventDestroy(syclEvent_t obj) { delete obj; return 0; } ================================================ FILE: deps/src/sycl.h ================================================ #pragma once #include #include #ifdef __cplusplus extern "C" { #endif typedef struct syclPlatform_st *syclPlatform_t; int syclPlatformCreate(syclPlatform_t *obj, ze_driver_handle_t driver); int syclPlatformDestroy(syclPlatform_t obj); typedef struct syclDevice_st *syclDevice_t; int syclDeviceCreate(syclDevice_t *obj, syclPlatform_t platform, ze_device_handle_t device); int syclDeviceDestroy(syclDevice_t obj); typedef struct syclContext_st *syclContext_t; int syclContextCreate(syclContext_t *obj, syclDevice_t *devices, size_t ndevices, ze_context_handle_t context, int keep_ownership); int syclContextDestroy(syclContext_t obj); typedef struct syclQueue_st *syclQueue_t; int syclQueueCreate(syclQueue_t *obj, syclContext_t context, syclDevice_t device, ze_command_queue_handle_t queue, int keep_ownership); int syclQueueDestroy(syclQueue_t obj); int syclQueueWait(syclQueue_t obj); typedef struct syclEvent_st *syclEvent_t; int syclEventCreate(syclEvent_t *obj, syclContext_t context, ze_event_handle_t event, int keep_ownership); int syclEventDestroy(syclEvent_t obj); #ifdef __cplusplus } #endif ================================================ FILE: deps/src/sycl.hpp ================================================ #pragma once #include "sycl.h" #include struct syclPlatform_st { sycl::platform val; }; struct syclDevice_st { sycl::device val; }; struct syclContext_st { sycl::context val; }; struct syclQueue_st { sycl::queue val; }; struct syclEvent_st { sycl::event val; }; ================================================ FILE: docs/Project.toml ================================================ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" [sources] oneAPI = {path = "/home/michel/git/oneAPI.jl"} [compat] Documenter = "1" ================================================ FILE: docs/make.jl ================================================ using Pkg Pkg.develop(PackageSpec(path=joinpath(dirname(@__FILE__), ".."))) # # when first running instantiate Pkg.instantiate() using Documenter using Documenter.Remotes using oneAPI oneAPI.versioninfo() makedocs( sitename = "oneAPI.jl", format = Documenter.HTML( prettyurls = Base.get(ENV, "CI", nothing) == "true", canonical = "https://exanauts.github.io/ExaPF.jl/stable/", mathengine = Documenter.KaTeX(), ), modules = [oneAPI], pages = [ "Home" => "index.md", "Installation" => "installation.md", "Getting Started" => "getting_started.md", "Usage" => [ "Array Programming" => "arrays.md", "Kernel Programming" => "kernels.md", "Memory Management" => "memory.md", "Device Intrinsics" => "device.md", "Performance Guide" => "usage/performance.md", ], "API Reference" => [ "Overview" => "api.md", "Context & Device Management" => "api/context.md", "Array Operations" => "api/arrays.md", "Kernel Programming" => "api/kernels.md", "Memory Management" => "api/memory.md", "Compiler & Reflection" => "api/compiler.md", "Level Zero (oneL0)" => "level_zero.md", "oneMKL" => "onemkl.md", ], "Troubleshooting" => "troubleshooting.md", ], checkdocs = :none, # Don't error on missing docstrings warnonly = [:cross_references, :missing_docs], # Only warn, don't error ) deploydocs( repo = "github.com/JuliaGPU/oneAPI.jl.git", target = "build", devbranch = "master", devurl = "dev", push_preview = true, ) ================================================ FILE: docs/src/api/arrays.md ================================================ # Array Operations This page documents the array types and operations provided by oneAPI.jl. ## Array Types ### Host-Side Arrays #### `oneArray{T,N,B}` N-dimensional dense array type for Intel GPU programming using oneAPI and Level Zero. **Type Parameters:** - `T`: Element type (must be stored inline, no isbits-unions) - `N`: Number of dimensions - `B`: Buffer type, one of: - `oneL0.DeviceBuffer`: GPU device memory (default, not CPU-accessible) - `oneL0.SharedBuffer`: Unified shared memory (CPU and GPU accessible) - `oneL0.HostBuffer`: Pinned host memory (CPU-accessible, GPU-visible) **Type Aliases:** - `oneVector{T}` = `oneArray{T,1}` - 1D array - `oneMatrix{T}` = `oneArray{T,2}` - 2D array - `oneVecOrMat{T}` = `Union{oneVector{T}, oneMatrix{T}}` - 1D or 2D array ### Device-Side Arrays #### `oneDeviceArray{T,N,A}` Device-side array type for use within GPU kernels. This type represents a view of GPU memory accessible within kernel code. Unlike `oneArray` which is used on the host, `oneDeviceArray` is designed for device-side operations and cannot be directly constructed on the host. **Type Parameters:** - `T`: Element type - `N`: Number of dimensions - `A`: Address space (typically `AS.CrossWorkgroup` for global memory) **Type Aliases:** - `oneDeviceVector` = `oneDeviceArray{T,1}` - 1D device array - `oneDeviceMatrix` = `oneDeviceArray{T,2}` - 2D device array #### `oneLocalArray(::Type{T}, dims)` Allocate local (workgroup-shared) memory within a GPU kernel. Local memory is shared among all work-items in a workgroup and provides faster access than global memory. ## Memory Type Queries ### `is_device(a::oneArray) -> Bool` Check if the array is stored in device memory (not directly CPU-accessible). ### `is_shared(a::oneArray) -> Bool` Check if the array is stored in shared (unified) memory, accessible from both CPU and GPU. ### `is_host(a::oneArray) -> Bool` Check if the array is stored in pinned host memory, which resides on the CPU but is visible to the GPU. ## Array Construction `oneArray` supports multiple construction patterns similar to standard Julia arrays: ```julia using oneAPI # Uninitialized arrays a = oneArray{Float32}(undef, 100) b = oneArray{Float32,2}(undef, 10, 10) # Specify memory type c = oneArray{Float32,1,oneL0.SharedBuffer}(undef, 100) # Shared memory d = oneArray{Float32,1,oneL0.HostBuffer}(undef, 100) # Host memory # From existing arrays e = oneArray(rand(Float32, 100)) f = oneArray([1, 2, 3, 4]) # Using zeros/ones/rand g = oneAPI.zeros(Float32, 100) h = oneAPI.ones(Float32, 100) i = oneAPI.rand(Float32, 100) # Do-block for automatic cleanup result = oneArray{Float32}(100) do arr arr .= 1.0f0 sum(arr) # Returns result, arr is freed automatically end ``` ## Array Operations `oneArray` implements the full `AbstractArray` interface and supports: ### Broadcasting ```julia a = oneArray(rand(Float32, 100)) b = oneArray(rand(Float32, 100)) c = a .+ b # Element-wise addition d = a .* 2.0f0 # Scalar multiplication e = sin.(a) # Unary operations f = a .+ b .* c # Fused operations ``` ### Reductions ```julia a = oneArray(rand(Float32, 100)) s = sum(a) # Sum p = prod(a) # Product m = maximum(a) # Maximum n = minimum(a) # Minimum μ = mean(a) # Mean (requires Statistics) ``` ### Mapping ```julia a = oneArray(rand(Float32, 100)) b = map(x -> x^2, a) # Apply function c = map(+, a, b) # Binary operation ``` ### Accumulation ```julia a = oneArray([1, 2, 3, 4]) b = cumsum(a) # Cumulative sum: [1, 3, 6, 10] c = cumprod(a) # Cumulative product: [1, 2, 6, 24] ``` ### Finding Elements ```julia a = oneArray([1.0f0, -2.0f0, 3.0f0, -4.0f0]) indices = findall(x -> x > 0, a) # Indices of positive elements ``` ### Random Number Generation ```julia using oneAPI, Random # Uniform distribution a = oneAPI.rand(Float32, 100) b = oneAPI.rand(Float32, 10, 10) # Normal distribution c = oneAPI.randn(Float32, 100) # With seed Random.seed!(1234) d = oneAPI.rand(Float32, 100) ``` ## Data Transfer ### CPU to GPU ```julia # Using constructor h_array = rand(Float32, 100) d_array = oneArray(h_array) # Using copyto! d_array = oneArray{Float32}(undef, 100) copyto!(d_array, h_array) ``` ### GPU to CPU ```julia # Using Array constructor h_array = Array(d_array) # Using copyto! h_array = Vector{Float32}(undef, 100) copyto!(h_array, d_array) ``` ### GPU to GPU ```julia d_array1 = oneArray(rand(Float32, 100)) d_array2 = similar(d_array1) copyto!(d_array2, d_array1) ``` ## Memory Types Comparison | Memory Type | CPU Access | GPU Access | Performance | Use Case | |-------------|-----------|------------|-------------|----------| | Device (default) | ❌ No | ✅ Fast | Fastest | GPU computations | | Shared | ✅ Yes | ✅ Good | Good | CPU-GPU data sharing | | Host | ✅ Yes | ✅ Slower | Moderate | Staging, pinned buffers | ```julia # Device memory (default, fastest for GPU) a = oneArray{Float32}(undef, 100) # Shared memory (CPU and GPU accessible) b = oneArray{Float32,1,oneL0.SharedBuffer}(undef, 100) # Host memory (CPU memory visible to GPU) c = oneArray{Float32,1,oneL0.HostBuffer}(undef, 100) # Query memory type is_device(a) # true is_shared(b) # true is_host(c) # true ``` ## Views and Slicing `oneArray` supports array views for efficient sub-array operations without copying: ```julia a = oneArray(rand(Float32, 100)) # Create a view v = view(a, 1:50) v .= 0.0f0 # Modifies first 50 elements of a # Slicing returns a view s = a[1:50] # This is a view, not a copy ``` ## Reshaping ```julia a = oneArray(rand(Float32, 100)) # Reshape to 2D b = reshape(a, 10, 10) # Flatten c = vec(b) # Returns 1D view ``` ## Advanced: Custom Array Wrappers For advanced use cases, oneAPI.jl provides type aliases for array wrappers: - `oneDenseArray`: Dense contiguous arrays - `oneStridedArray`: Arrays with arbitrary strides (including views) - `oneWrappedArray`: Any array backed by a oneArray These are useful for writing functions that accept various array types: ```julia function my_kernel!(a::oneStridedArray{Float32}) # Accepts oneArray and views a .+= 1.0f0 end ``` ================================================ FILE: docs/src/api/compiler.md ================================================ # Compiler and Reflection This page documents the compiler interface and code reflection tools for oneAPI.jl. ## Code Reflection oneAPI.jl provides macros for inspecting code generation at various stages: - `@device_code_lowered` - Show lowered IR (desugared Julia code) - `@device_code_typed` - Show type-inferred IR - `@device_code_warntype` - Show type-inferred IR with type stability warnings - `@device_code_llvm` - Show LLVM IR - `@device_code_spirv` - Show SPIR-V assembly - `@device_code` - Show all compilation stages interactively These macros are re-exported from GPUCompiler.jl. See the [GPUCompiler documentation](https://github.com/JuliaGPU/GPUCompiler.jl) for detailed usage. ### `return_type(f, tt) -> Type` Return the inferred return type of function `f` when called with argument types `tt` in a GPU kernel context. **Arguments:** - `f`: Function to analyze - `tt`: Tuple type of arguments **Returns:** - Type that `f(args...)` would return where `args::tt` **Example:** ```julia function compute(x::Float32) return x * 2.0f0 end rt = oneAPI.return_type(compute, Tuple{Float32}) @assert rt == Float32 ``` ## Inspecting Generated Code Code reflection tools help you understand how your Julia code is compiled to GPU code: ### LLVM IR View the LLVM intermediate representation: ```julia using oneAPI function kernel(a, b) i = get_global_id() @inbounds a[i] = b[i] + 1.0f0 return end a = oneArray(zeros(Float32, 10)) b = oneArray(rand(Float32, 10)) @device_code_llvm @oneapi groups=1 items=10 kernel(a, b) ``` ### SPIR-V Assembly View the final SPIR-V assembly that runs on the GPU: ```julia @device_code_spirv @oneapi groups=1 items=10 kernel(a, b) ``` ### Type Inference Check for type instabilities that hurt performance: ```julia @device_code_warntype @oneapi groups=1 items=10 kernel(a, b) ``` ### Type-Inferred IR See the typed intermediate representation: ```julia @device_code_typed @oneapi groups=1 items=10 kernel(a, b) ``` ### Interactive Inspection Use `@device_code` for an interactive menu: ```julia @device_code @oneapi groups=1 items=10 kernel(a, b) # Opens a menu to select which compilation stage to view ``` ## Return Type Inference Query the return type of a kernel: ```julia function compute(x::Float32) return x * 2.0f0 end # Infer return type rt = oneAPI.return_type(compute, Tuple{Float32}) @assert rt == Float32 ``` ## Debugging Type Issues ### Common Type Instability Sources ```julia # ❌ Type instability: Conditional returns different types function bad_kernel(x, flag) if flag return x # Float32 else return 0 # Int end end # ✅ Type stable: Consistent return type function good_kernel(x, flag) if flag return x # Float32 else return 0.0f0 # Float32 end end ``` ### Using @device_code_warntype ```julia function mystery_kernel!(output, input) i = get_global_id() @inbounds output[i] = some_complex_function(input[i]) return end # Check for type issues @device_code_warntype @oneapi groups=1 items=10 mystery_kernel!(a, b) # Look for red warnings indicating type instability ``` ## Compilation Options ### Kernel vs Device Function ```julia # Compile as kernel (default for @oneapi) @device_code_llvm @oneapi kernel=true kernel(a, b) # Compile as device function (callable from other kernels) @device_code_llvm @oneapi kernel=false helper_function(x) ``` ### Always Inline Force inlining of device functions: ```julia @oneapi always_inline=true kernel(a, b) ``` ### Custom Kernel Name Specify a custom name for the kernel: ```julia @oneapi name="my_custom_kernel" kernel(a, b) ``` ## Example: Optimizing a Kernel Here's a workflow for optimizing a kernel using reflection tools: ```julia using oneAPI # Initial version function sum_kernel_v1!(result, data) i = get_global_id() if i == 1 sum = 0 for j in 1:length(data) sum += data[j] end result[1] = sum end return end data = oneArray(rand(Float32, 1000)) result = oneArray(zeros(Float32, 1)) # Check for type issues @device_code_warntype @oneapi groups=1 items=1 sum_kernel_v1!(result, data) # Notice: `sum` might be Int instead of Float32! # Fixed version function sum_kernel_v2!(result, data) i = get_global_id() if i == 1 sum = 0.0f0 # Explicitly Float32 for j in 1:length(data) sum += data[j] end result[1] = sum end return end # Verify the fix @device_code_warntype @oneapi groups=1 items=1 sum_kernel_v2!(result, data) # Should be type-stable now! # Check the generated code @device_code_llvm @oneapi groups=1 items=1 sum_kernel_v2!(result, data) ``` ## Profiling For performance profiling, see the [Performance Guide](@ref). ## Troubleshooting ### Compilation Errors If you encounter compilation errors: 1. **Check type stability**: Use `@device_code_warntype` 2. **Inspect LLVM IR**: Use `@device_code_llvm` to see if the issue is in LLVM generation 3. **Simplify the kernel**: Comment out sections to isolate the problematic code 4. **Check argument types**: Ensure arguments are GPU-compatible (isbits types) ### SPIR-V Issues If SPIR-V generation fails: 1. **Update dependencies**: Ensure SPIRV-LLVM-Translator is up to date 2. **Check device capabilities**: Some operations require specific hardware features 3. **Reduce complexity**: Very complex kernels might hit compiler limits ### Performance Issues If your kernel is slow: 1. **Profile memory access patterns**: Coalesced access is crucial 2. **Check occupancy**: Are you launching enough work-items? 3. **Minimize barriers**: Synchronization has overhead 4. **Use local memory wisely**: It's faster than global memory but limited in size ================================================ FILE: docs/src/api/context.md ================================================ # Context and Device Management This page documents the API for managing Level Zero drivers, devices, and contexts in oneAPI.jl. ## Overview oneAPI.jl uses task-local state to manage GPU resources. This allows different Julia tasks to work with different drivers, devices, or contexts without interfering with each other. The typical hierarchy is: - **Driver**: Represents a Level Zero driver (usually one per GPU vendor/installation) - **Device**: Represents a physical GPU device - **Context**: Manages resources like memory allocations and command queues ## Driver Management ### `driver() -> ZeDriver` Get the current Level Zero driver for the calling task. If no driver has been explicitly set with `driver!`, returns the first available driver. The driver selection is task-local. ### `driver!(drv::ZeDriver)` Set the current Level Zero driver for the calling task. This also clears the current device selection, as devices are associated with specific drivers. ### `drivers() -> Vector{ZeDriver}` Return a list of all available Level Zero drivers. ## Device Management ### `device() -> ZeDevice` Get the current Level Zero device for the calling task. If no device has been explicitly set with `device!`, returns the first available device for the current driver. The device selection is task-local. ### `device!(dev::ZeDevice)` / `device!(i::Int)` Set the current Level Zero device for the calling task. Can pass either a device object or a 1-based device index. ### `devices() -> Vector{ZeDevice}` / `devices(drv::ZeDriver)` Return a list of available Level Zero devices. Without arguments, returns devices for the current driver. ## Context Management ### `context() -> ZeContext` Get the current Level Zero context for the calling task. If no context has been explicitly set with `context!`, returns a global context for the current driver. Contexts manage the lifetime of resources like memory allocations and command queues. ### `context!(ctx::ZeContext)` Set the current Level Zero context for the calling task. ## Command Queues ### `global_queue(ctx::ZeContext, dev::ZeDevice) -> ZeCommandQueue` Get the global command queue for the given context and device. This queue is used as the default queue for executing operations. The queue is created with in-order execution flags. ### `synchronize()` Block the host thread until all operations on the global command queue for the current context and device have completed. ## Example Workflow ```julia using oneAPI # List available drivers drv_list = drivers() println("Available drivers: ", length(drv_list)) # Select a specific driver driver!(drv_list[1]) # List devices for current driver dev_list = devices() println("Available devices: ", length(dev_list)) # Select a specific device device!(dev_list[1]) # Get the current context (created automatically) ctx = context() # Perform GPU operations... a = oneArray(rand(Float32, 100)) # Wait for all operations to complete synchronize() ``` ## Multi-Device Programming You can use different devices in different Julia tasks: ```julia using oneAPI # Task 1: Use first device Threads.@spawn begin device!(1) a = oneArray(rand(Float32, 100)) # ... operations on device 1 ... end # Task 2: Use second device Threads.@spawn begin device!(2) b = oneArray(rand(Float32, 100)) # ... operations on device 2 ... end ``` ================================================ FILE: docs/src/api/kernels.md ================================================ # Kernel Programming This page documents the kernel programming API for writing custom GPU kernels in oneAPI.jl. ## Kernel Launch ### `@oneapi [kwargs...] kernel(args...)` High-level interface for launching Julia kernels on Intel GPUs using oneAPI. This macro compiles a Julia function to SPIR-V, prepares the arguments, and optionally launches the kernel on the GPU. **Keyword Arguments:** **Macro Keywords (compile-time):** - `launch::Bool=true`: Whether to launch the kernel immediately **Compiler Keywords:** - `kernel::Bool=false`: Whether to compile as a kernel or device function - `name::Union{String,Nothing}=nothing`: Explicit name for the kernel - `always_inline::Bool=false`: Whether to always inline device functions **Launch Keywords (runtime):** - `groups`: Number of workgroups (required). Can be an integer or tuple. - `items`: Number of work-items per workgroup (required). Can be an integer or tuple. - `queue::ZeCommandQueue`: Command queue to submit to (defaults to global queue). ### `zefunction(f, tt; kwargs...)` Compile a Julia function to a Level Zero kernel function. This is the lower-level interface used by `@oneapi`. Returns a callable kernel object. ### `kernel_convert(x)` Convert arguments for kernel execution. This function is called for every argument passed to a kernel, allowing customization of argument conversion. By default, it converts `oneArray` to `oneDeviceArray`. ## Basic Kernel Example ```julia using oneAPI function vadd_kernel!(a, b, c) i = get_global_id() if i <= length(a) @inbounds c[i] = a[i] + b[i] end return end N = 1024 a = oneArray(rand(Float32, N)) b = oneArray(rand(Float32, N)) c = similar(a) # Launch with 4 workgroups of 256 work-items each @oneapi groups=4 items=256 vadd_kernel!(a, b, c) ``` ## Launch Configuration ### Workgroups and Work-Items The oneAPI execution model is based on: - **Work-items**: Individual threads of execution (analogous to CUDA threads) - **Workgroups**: Groups of work-items that can synchronize and share local memory (analogous to CUDA blocks) ```julia # 1D configuration @oneapi groups=10 items=64 kernel(args...) # 640 work-items total # 2D configuration @oneapi groups=(10, 10) items=(8, 8) kernel(args...) # 6400 work-items total # 3D configuration @oneapi groups=(4, 4, 4) items=(4, 4, 4) kernel(args...) # 4096 work-items total ``` ### Determining Launch Configuration ```julia # For simple element-wise operations N = length(array) items = 256 # Typical workgroup size groups = cld(N, items) # Ceiling division @oneapi groups=groups items=items kernel(array) ``` ### Compile Without Launch You can compile a kernel without launching it: ```julia # Compile the kernel kernel = @oneapi launch=false vadd_kernel!(a, b, c) # Launch later with different configurations kernel(a, b, c; groups=4, items=256) kernel(a, b, c; groups=8, items=128) ``` ## Device Intrinsics Inside GPU kernels, you can use various intrinsics to query execution context and synchronize work-items. ### Thread Indexing ```julia # Global ID (unique across all work-items) i = get_global_id() # 1D linear index i = get_global_id(0) # X dimension j = get_global_id(1) # Y dimension k = get_global_id(2) # Z dimension # Local ID (within workgroup) local_i = get_local_id() # 1D linear index local_i = get_local_id(0) # X dimension local_j = get_local_id(1) # Y dimension local_k = get_local_id(2) # Z dimension # Workgroup ID group_i = get_group_id(0) # X dimension group_j = get_group_id(1) # Y dimension group_k = get_group_id(2) # Z dimension # Workgroup size local_size = get_local_size() # Total work-items in workgroup local_size_x = get_local_size(0) local_size_y = get_local_size(1) # Global size global_size = get_global_size() # Total work-items global_size_x = get_global_size(0) ``` ### 2D Matrix Example ```julia function matmul_kernel!(C, A, B) # Get 2D indices row = get_global_id(0) col = get_global_id(1) if row <= size(C, 1) && col <= size(C, 2) sum = 0.0f0 for k in 1:size(A, 2) @inbounds sum += A[row, k] * B[k, col] end @inbounds C[row, col] = sum end return end M, N, K = 256, 256, 256 A = oneArray(rand(Float32, M, K)) B = oneArray(rand(Float32, K, N)) C = oneArray{Float32}(undef, M, N) # Launch with 2D configuration items = (16, 16) # 16x16 work-items per workgroup groups = (cld(M, items[1]), cld(N, items[2])) @oneapi groups=groups items=items matmul_kernel!(C, A, B) ``` ### Synchronization ```julia # Barrier: synchronize all work-items in a workgroup barrier() # Memory fences (ensure memory operations are visible) mem_fence() # Both local and global memory local_mem_fence() # Local memory only global_mem_fence() # Global memory only ``` ### Local Memory Local memory (workgroup-shared memory) enables cooperation between work-items: ```julia function optimized_reduction!(result, input) local_id = get_local_id() local_size = get_local_size() # Allocate local memory (shared within workgroup) local_data = oneLocalArray(Float32, 256) # Load into local memory @inbounds local_data[local_id] = input[get_global_id()] barrier() # Tree reduction in local memory stride = local_size ÷ 2 while stride > 0 if local_id <= stride @inbounds local_data[local_id] += local_data[local_id + stride] end barrier() stride ÷= 2 end # First work-item writes result if local_id == 1 @inbounds result[get_group_id()] = local_data[1] end return end ``` ### Atomic Operations For thread-safe operations on shared data: ```julia # Atomic add oneAPI.atomic_add!(ptr, value) # Atomic exchange old_value = oneAPI.atomic_xchg!(ptr, new_value) # Atomic compare-and-swap old_value = oneAPI.atomic_cas!(ptr, compare, new_value) # Atomic min/max oneAPI.atomic_min!(ptr, value) oneAPI.atomic_max!(ptr, value) ``` Example histogram kernel: ```julia function histogram_kernel!(hist, data, bins) i = get_global_id() if i <= length(data) @inbounds val = data[i] bin = clamp(floor(Int, val * bins) + 1, 1, bins) oneAPI.atomic_add!(pointer(hist, bin), 1) end return end ``` ## Kernel Restrictions GPU kernels have certain restrictions: 1. **Must return `nothing`**: Kernels cannot return values directly. Use output arrays instead. 2. **No dynamic memory allocation**: Cannot allocate arrays inside kernels 3. **No I/O operations**: Cannot print or write to files (use printf-style debugging with care) 4. **Limited recursion**: Avoid or minimize recursive calls 5. **Type stability**: Ensure type-stable code for best performance ```julia # ❌ Bad: Returns a value function bad_kernel(a) return a[1] + 1 end # ✅ Good: Returns nothing, uses output parameter function good_kernel!(result, a) @inbounds result[1] = a[1] + 1 return end ``` ## KernelAbstractions.jl For portable GPU programming across CUDA, AMD, and Intel GPUs, use KernelAbstractions.jl: ```julia using KernelAbstractions using oneAPI @kernel function generic_kernel!(a, b) i = @index(Global) @inbounds a[i] = a[i] + b[i] end a = oneArray(rand(Float32, 100)) b = oneArray(rand(Float32, 100)) backend = get_backend(a) # oneAPIBackend() kernel! = generic_kernel!(backend) kernel!(a, b, ndrange=length(a)) ``` See the [KernelAbstractions.jl documentation](https://juliagpu.github.io/KernelAbstractions.jl/stable/) for more details. ## Debugging Kernels See the [Compiler and Reflection](@ref) page for tools to inspect generated code and debug kernels. ================================================ FILE: docs/src/api/memory.md ================================================ # Memory Management This page documents memory management in oneAPI.jl. ## Memory Operations ### `Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice, dst, src, N)` Low-level memory copy operation on the GPU. Copies `N` elements from `src` to `dst` using the specified context and device. Both `src` and `dst` can be either host pointers (`Ptr`) or device pointers (`ZePtr`). !!! warning This is a low-level function. No bounds checking is performed. For safe array copying, use `copyto!` on `oneArray` objects instead. ### `unsafe_fill!(ctx::ZeContext, dev::ZeDevice, ptr, pattern, N)` Low-level memory fill operation on the GPU. Fills `N` elements at `ptr` with the given pattern using the specified context and device. !!! warning This is a low-level function. For safe array operations, use `fill!` on `oneArray` objects instead. ## Memory Types oneAPI supports three types of memory through Unified Shared Memory (USM): ### Device Memory (Default) Fastest GPU access, not directly accessible from CPU. ```julia # Create array in device memory (default) a = oneArray{Float32}(undef, 1000) @assert is_device(a) # Or explicitly specify b = oneArray{Float32,1,oneL0.DeviceBuffer}(undef, 1000) ``` **Advantages:** - Fastest GPU access - Best for compute-intensive operations **Disadvantages:** - Cannot directly access from CPU - Requires explicit copy to/from CPU **Use when:** Data stays on GPU for multiple operations ### Shared Memory Accessible from both CPU and GPU with automatic migration. ```julia # Create array in shared memory a = oneArray{Float32,1,oneL0.SharedBuffer}(undef, 1000) @assert is_shared(a) # Can access from CPU a[1] = 42.0f0 # Automatic migration to CPU println(a[1]) # Read from CPU # Can use in GPU kernels @oneapi groups=1 items=1000 kernel(a) # Automatic migration to GPU ``` **Advantages:** - Accessible from both CPU and GPU - Unified virtual addressing - Automatic migration **Disadvantages:** - Migration overhead - Slower than device memory for pure GPU work **Use when:** Frequent CPU-GPU data exchange needed ### Host Memory CPU memory that's pinned and visible to GPU. ```julia # Create array in host memory a = oneArray{Float32,1,oneL0.HostBuffer}(undef, 1000) @assert is_host(a) # Direct CPU access a[1] = 42.0f0 # Can be used by GPU (but slower than device memory) @oneapi groups=1 items=1000 kernel(a) ``` **Advantages:** - Direct CPU access - Pinned memory (faster PCIe transfers) - Good for staging **Disadvantages:** - Slower GPU access than device memory - Uses pinned system memory (limited resource) **Use when:** Staging data for transfer, or CPU needs to write while GPU reads ## Memory Type Comparison | Feature | Device | Shared | Host | |---------|--------|--------|------| | CPU Access | ❌ No | ✅ Yes | ✅ Yes | | GPU Performance | ⭐⭐⭐ Fastest | ⭐⭐ Good | ⭐ Slower | | Migration | Manual | Automatic | Manual | | Use Case | Pure GPU | Mixed CPU/GPU | Staging | ## Memory Allocation and Deallocation ### Automatic Management Julia's garbage collector automatically manages `oneArray` memory: ```julia function allocate_and_compute() a = oneArray(rand(Float32, 1000)) b = oneArray(rand(Float32, 1000)) c = a .+ b return Array(c) # Only c is copied back # a and b will be garbage collected end result = allocate_and_compute() # GPU memory for a and b is freed eventually ``` ### Manual Garbage Collection Force garbage collection to free GPU memory: ```julia # Allocate large arrays a = oneArray(rand(Float32, 10_000_000)) b = oneArray(rand(Float32, 10_000_000)) # Clear references a = nothing b = nothing # Force GC to reclaim GPU memory GC.gc() ``` ### Explicit Freeing Immediately free GPU memory (use with caution): ```julia a = oneArray(rand(Float32, 1000)) # ... use a ... # Explicitly free (dangerous if still in use!) unsafe_free!(a) # a is now invalid - do not use! ``` !!! warning Only use `unsafe_free!` when you're certain the array is no longer needed, including by any pending GPU operations. Prefer letting the GC handle cleanup. ### Do-Block Pattern Use do-blocks for automatic cleanup: ```julia result = oneArray{Float32}(1000) do temp # temp is automatically freed when block exits temp .= 1.0f0 sum(temp) # Result is returned end ``` ## Memory Pooling oneAPI.jl uses memory pooling to reduce allocation overhead: ```julia using oneAPI # Allocations are pooled for i in 1:100 a = oneArray(rand(Float32, 1000)) # ... use a ... # Memory is returned to pool, not freed end ``` The pool automatically manages memory reuse, reducing allocation costs. ## Checking Memory Usage Query GPU memory info: ```julia using oneAPI.oneL0 dev = device() props = memory_properties(dev) for prop in props println("Memory size: ", prop.totalSize ÷ (1024^3), " GB") end ``` ## Out of Memory Errors If you encounter out-of-memory errors: ### 1. Reduce Batch Size ```julia # Instead of processing all at once result = process(oneArray(huge_data)) # Process in smaller batches for batch in batches(huge_data, size=1000) result = process(oneArray(batch)) # Process result... end ``` ### 2. Free Unused Arrays ```julia a = oneArray(rand(Float32, 1_000_000)) b = compute(a) # If 'a' is no longer needed unsafe_free!(a) # Continue with 'b' result = process(b) ``` ### 3. Use Shared or Host Memory ```julia # Instead of device memory a = oneArray{Float32}(undef, huge_size) # Use shared memory (can swap to system RAM) a = oneArray{Float32,1,oneL0.SharedBuffer}(undef, huge_size) ``` ### 4. Force Garbage Collection ```julia # After freeing references large_array = nothing GC.gc() # Immediately reclaim GPU memory ``` ### 5. Use Multiple Devices ```julia # Distribute work across devices for (i, dev_id) in enumerate(1:length(devices())) Threads.@spawn begin device!(dev_id) partition = data_partitions[i] a = oneArray(partition) result = compute(a) # ... end end ``` ## Low-Level Memory Operations For advanced users, oneL0 provides direct memory management: ```julia using oneAPI.oneL0 ctx = context() dev = device() # Allocate device memory ptr = device_alloc(ctx, dev, 1024, 8) # 1024 bytes, 8-byte aligned # Copy data data = rand(Float32, 256) GC.@preserve data begin unsafe_copyto!(ctx, dev, ptr, pointer(data), 256) end # Free memory free(ctx, ptr) ``` ## Memory Advise and Prefetch Hint to the runtime about memory usage (shared memory only): ```julia using oneAPI.oneL0 a = oneArray{Float32,1,oneL0.SharedBuffer}(undef, 1000) # Advise that this will be read-only on the device # (Implementation depends on Level Zero driver support) # Prefetch to device ctx = context() dev = device() queue = global_queue(ctx, dev) execute!(queue) do list append_prefetch!(list, pointer(a), sizeof(a)) end ``` ## Best Practices 1. **Use device memory by default** for best GPU performance 2. **Use shared memory** when you need CPU access without explicit copies 3. **Use host memory** for staging data or when CPU writes frequently 4. **Let GC handle cleanup** unless you have specific memory pressure 5. **Reuse allocations** within loops when possible 6. **Profile memory usage** to identify bottlenecks 7. **Be cautious with `unsafe_free!`** - use only when you're certain it's safe ## Example: Efficient Memory Usage ```julia using oneAPI function efficient_pipeline(data_batches) # Allocate output buffer once result = oneArray{Float32}(undef, 1000) results = Float32[] for batch in data_batches # Reuse input buffer by copying input = oneArray(batch) # Compute in-place when possible @oneapi groups=4 items=250 process_kernel!(result, input) # Copy result back push!(results, Array(result)...) # Input is freed when loop continues end return results end ``` ================================================ FILE: docs/src/api.md ================================================ # API Reference This page provides an overview of the oneAPI.jl API. For detailed documentation, see the specific API reference pages: - [Context & Device Management](api/context.md) - Managing drivers, devices, and contexts - [Array Operations](api/arrays.md) - Working with GPU arrays - [Kernel Programming](api/kernels.md) - Writing and launching custom kernels - [Memory Management](api/memory.md) - Memory allocation and transfer - [Compiler & Reflection](api/compiler.md) - Code generation and introspection ## Core Functions ```@autodocs Modules = [oneAPI] Pages = ["src/context.jl", "src/utils.jl"] Filter = t -> t !== oneAPI.synchronize ``` ## Compiler Functions ```@autodocs Modules = [oneAPI] Pages = ["src/compiler/execution.jl", "src/compiler/reflection.jl"] ``` ## oneL0 (Level Zero) Low-level bindings to the Level Zero API. See the [Level Zero page](level_zero.md) for details. ```@autodocs Modules = [oneAPI.oneL0] Filter = t -> t !== oneAPI.oneL0.synchronize ``` ## oneMKL Intel oneAPI Math Kernel Library bindings. See the [oneMKL page](onemkl.md) for details. ```@autodocs Modules = [oneAPI.oneMKL] ``` ================================================ FILE: docs/src/arrays.md ================================================ # Array Programming oneAPI.jl provides an array type, `oneArray`, which lives on the GPU. It implements the interface defined by `GPUArrays.jl`, allowing for high-level array operations. ## The `oneArray` Type The `oneArray{T,N}` type represents an N-dimensional array with elements of type `T` stored on the GPU. ```julia using oneAPI # Allocate an uninitialized array a = oneArray{Float32}(undef, 1024) # Initialize from a CPU array b = oneArray([1, 2, 3, 4]) # Initialize with zeros/ones z = oneAPI.zeros(Float32, 100) o = oneAPI.ones(Float32, 100) ``` ## Array Operations Since `oneArray` implements the AbstractArray interface, you can use standard Julia array operations. ```julia a = oneArray(rand(Float32, 10)) b = oneArray(rand(Float32, 10)) c = a .+ b # Element-wise addition d = sum(a) # Reduction e = map(sin, a) # Map ``` ## Data Transfer To move data between the host (CPU) and the device (GPU), use the constructors or `copyto!`. ```julia # CPU to GPU d_a = oneArray(h_a) # GPU to CPU h_a = Array(d_a) ``` ## Backend Agnostic Programming To write code that works on both CPU and GPU (and other backends like CUDA), use the generic array interfaces provided by `GPUArrays.jl`. Avoid hardcoding `oneArray` in your functions; instead, accept `AbstractArray` and let the dispatch system handle the specific implementation. ```julia function generic_add!(a::AbstractArray, b::AbstractArray) a .+= b return a end # Works on CPU generic_add!(rand(10), rand(10)) # Works on Intel GPU generic_add!(oneArray(rand(10)), oneArray(rand(10))) ``` ================================================ FILE: docs/src/device.md ================================================ # Device Intrinsics When writing custom kernels, you have access to a set of device intrinsics that map to underlying hardware instructions. ## Indexing These functions allow you to determine the current thread's position in the execution grid. - `get_global_id(dim=0)`: Global index of the work item. - `get_local_id(dim=0)`: Local index of the work item within the workgroup. - `get_group_id(dim=0)`: Index of the workgroup. - `get_global_size(dim=0)`: Global size of the ND-range. - `get_local_size(dim=0)`: Size of the workgroup. - `get_num_groups(dim=0)`: Number of workgroups. ## Synchronization - `barrier(flags=0)`: Synchronizes all work items in a workgroup. ## Atomics Atomic operations are supported for thread-safe updates to memory. - `atomic_add!(ptr, val)` - `atomic_sub!(ptr, val)` - `atomic_inc!(ptr)` - `atomic_dec!(ptr)` - `atomic_min!(ptr, val)` - `atomic_max!(ptr, val)` - `atomic_and!(ptr, val)` - `atomic_or!(ptr, val)` - `atomic_xor!(ptr, val)` - `atomic_cmpxchg!(ptr, cmp, val)` Supported types for atomics generally include `Int32`, `Int64`, `UInt32`, `UInt64`, `Float32`, and `Float64`. ## Math Functions Standard math functions from Julia's `Base` are supported within kernels (e.g., `sin`, `cos`, `exp`, `sqrt`). ================================================ FILE: docs/src/getting_started.md ================================================ # Getting Started ## Basic Usage The most basic usage involves moving data to the GPU using `oneArray` and performing operations on it. ```julia using oneAPI # Create an array on the CPU a = rand(Float32, 1024) # Move it to the GPU d_a = oneArray(a) # Perform operations on the GPU d_b = d_a .+ 1.0f0 # Move the result back to the CPU b = Array(d_b) ``` ## Matrix Multiplication Matrix multiplication is accelerated using the oneMKL library when available. ```julia using oneAPI A = oneArray(rand(Float32, 128, 128)) B = oneArray(rand(Float32, 128, 128)) # This operation runs on the GPU C = A * B ``` ## Writing Kernels For custom operations, you can write kernels using the `@oneapi` macro. ```julia using oneAPI function my_kernel(a, b) i = get_global_id() @inbounds a[i] += b[i] return end a = oneArray(ones(Float32, 1024)) b = oneArray(ones(Float32, 1024)) # Launch the kernel with 1024 items @oneapi items=1024 my_kernel(a, b) ``` See the [Kernel Programming](kernels.md) section for more details. ================================================ FILE: docs/src/index.md ================================================ # oneAPI.jl *Julia support for the oneAPI programming toolkit.* oneAPI.jl provides support for working with the [oneAPI unified programming model](https://software.intel.com/en-us/oneapi). The package is currently verified to work with the implementation provided by the [Intel Compute Runtime](https://github.com/intel/compute-runtime), primarily on Linux. ## Writing Portable Code While oneAPI.jl provides specific functionality for Intel GPUs, it is highly recommended to write **backend-agnostic code** whenever possible. This allows your code to run on various hardware backends (NVIDIA, AMD, Intel, Apple) without modification. - **[GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl)**: Use high-level array abstractions that work across different GPU backends. - **[KernelAbstractions.jl](https://github.com/JuliaGPU/KernelAbstractions.jl)**: Use this package for writing kernels that can be compiled for CPU, CUDA, ROCm, and oneAPI devices. Direct use of `oneAPI`-specific macros (like `@oneapi`) and types (like `oneArray`) should be reserved for cases where you need specific optimizations or features not covered by the generic abstractions. ## Features - **High-level Array Abstractions**: `oneArray` type fully implementing the `GPUArrays.jl` interface. - **Kernel Programming**: Execute custom kernels written in Julia on Intel GPUs. - **Level Zero Integration**: Low-level access to the Level Zero API via the `oneL0` submodule. - **oneMKL Support**: Integration with Intel oneMKL for BLAS, LAPACK, and sparse operations. - **SYCL Integration**: Interoperability with SYCL (on Linux). ## Requirements - **Julia**: 1.10 or higher - **OS**: Linux - **Hardware**: Intel Gen9 graphics or newer (including Intel Arc A-Series) ================================================ FILE: docs/src/installation.md ================================================ # Installation ## Requirements oneAPI.jl requires: - **Julia**: 1.10 or higher - **OS**: Linux (recommended) or Windows (experimental via WSL2) - **Hardware**: Intel Gen9 graphics or newer. For Intel Arc GPUs (A580, A750, A770, etc), **Linux 6.2+** is required. ## Installing oneAPI.jl You can install oneAPI.jl using the Julia package manager: ```julia pkg> add oneAPI ``` This will automatically download the necessary binary dependencies, including: - `oneAPI loader` - `SPIR-V tools` - `Intel Compute Runtime` (if compatible hardware is found) ## Verifying Installation After installation, you can verify that oneAPI.jl is working correctly and detecting your hardware: ```julia julia> using oneAPI julia> oneAPI.versioninfo() ``` The output should list the binary dependencies, toolchain versions, available drivers, and devices. ## Troubleshooting Drivers If no drivers or devices are detected, ensure that you have the correct Intel graphics drivers installed for your system. - On Linux, check if `libze_intel_gpu.so` or similar libraries are available. - On Windows (WSL2), ensure you have the latest Intel graphics drivers installed on the host Windows system and that WSL2 is configured to access the GPU. You can explicitly select drivers and devices if multiple are available: ```julia julia> drivers() julia> devices() julia> device!(1) # Select the first available device ``` ## Using System Libraries (Advanced) !!! warning Using system libraries instead of the provided artifacts is **not recommended** for most users. Only use this approach if you have specialized requirements or custom Intel binaries. By default, oneAPI.jl uses pre-built binary artifacts (JLLs) for the Intel Compute Runtime, oneAPI loader, and related libraries. However, you may need to use system-installed libraries in certain situations: - Custom or newer Intel graphics drivers - Specialized hardware configurations - Development or debugging of the runtime stack - Systems where the artifacts are incompatible ### Configuration Script oneAPI.jl provides a helper script to discover and configure system libraries. From the Julia REPL: ```julia julia> include(joinpath(pkgdir(oneAPI), "res", "local.jl")) ``` This script will: 1. Search for Intel libraries on your system: - Intel Graphics Compiler (IGC): `libigc`, `libiga64`, `libigdfcl`, `libopencl-clang` - Graphics Memory Management Library: `libigdgmm` - Intel Compute Runtime (NEO): `libze_intel_gpu`, `libigdrcl` - oneAPI Level Zero Loader: `libze_loader`, `libze_validation_layer` 2. Generate preferences in `LocalPreferences.toml` that override the artifact paths ### Manual Configuration You can also manually set preferences to use specific library paths. Create or edit `LocalPreferences.toml` in your project or global environment: ```toml [NEO_jll] libze_intel_gpu_path = "/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so.1" libigdrcl_path = "/usr/lib/x86_64-linux-gnu/intel-opencl/libigdrcl.so" [libigc_jll] libigc_path = "/usr/lib/x86_64-linux-gnu/libigc.so" libigdfcl_path = "/usr/lib/x86_64-linux-gnu/libigdfcl.so" [gmmlib_jll] libigdgmm_path = "/usr/lib/x86_64-linux-gnu/libigdgmm.so" [oneAPI_Level_Zero_Loader_jll] libze_loader_path = "/usr/lib/x86_64-linux-gnu/libze_loader.so" ``` ### Reverting to Artifacts To revert to the default artifact binaries, simply delete the oneAPI-related entries from `LocalPreferences.toml` (or delete the entire file if it only contains these preferences). ### Common Locations System libraries are typically installed in: **Ubuntu/Debian:** - `/usr/lib/x86_64-linux-gnu/` - `/usr/lib/x86_64-linux-gnu/intel-opencl/` **Fedora/RHEL:** - `/usr/lib64/` - `/usr/lib64/intel-opencl/` **Custom Intel oneAPI installation:** - `/opt/intel/oneapi/compiler/latest/linux/lib/` - `/opt/intel/oneapi/compiler/latest/linux/lib/x64/` ### Verifying System Library Configuration After configuring system libraries, restart Julia and verify the configuration: ```julia julia> using oneAPI julia> oneAPI.versioninfo() ``` Check that the reported library paths match your system libraries. If issues arise, examine the `LocalPreferences.toml` file and ensure all paths are correct and the libraries are compatible with each other. ================================================ FILE: docs/src/kernels.md ================================================ # Kernel Programming For maximum performance or custom operations not covered by high-level array abstractions, you can write custom kernels in Julia that execute on the GPU. ## The `@oneapi` Macro The `@oneapi` macro is used to launch a kernel on the device. It takes configuration arguments like the number of items (threads) and groups (blocks). ```julia using oneAPI function kernel(a, b) i = get_global_id() if i <= length(a) @inbounds a[i] += b[i] end return end a = oneArray(rand(Float32, 100)) b = oneArray(rand(Float32, 100)) # Launch configuration items = 100 groups = 1 @oneapi items=items groups=groups kernel(a, b) ``` ## KernelAbstractions.jl For portable kernel programming, it is highly recommended to use [KernelAbstractions.jl](https://github.com/JuliaGPU/KernelAbstractions.jl). This allows you to write kernels that work on CPU, CUDA, ROCm, and oneAPI. ```julia using KernelAbstractions, oneAPI @kernel function my_kernel!(a, b) i = @index(Global, Linear) @inbounds a[i] += b[i] end # Get the backend backend = get_backend(a) # Instantiate the kernel k = my_kernel!(backend) # Launch with configuration k(a, b; ndrange=length(a)) ``` ## Device Intrinsics Inside a kernel, you can use various intrinsics to interact with the hardware: - `get_global_id()`: Get the global thread ID. - `get_local_id()`: Get the local thread ID within a workgroup. - `get_group_id()`: Get the workgroup ID. - `barrier()`: Synchronize threads within a workgroup. These correspond to standard OpenCL/Level Zero intrinsics. ================================================ FILE: docs/src/level_zero.md ================================================ # Level Zero Interface The `oneL0` submodule provides low-level access to the Level Zero API, which gives you fine-grained control over the hardware. ## Drivers and Devices You can enumerate available drivers and devices: ```julia using oneAPI.oneL0 # Get available drivers drvs = drivers() # Get devices for a driver devs = devices(first(drvs)) # Inspect device properties props = compute_properties(first(devs)) println("Max workgroup size: ", props.maxTotalGroupSize) ``` ## Contexts and Queues Manage contexts and command queues for executing operations: ```julia # Create a context ctx = ZeContext(first(drvs)) # Create a command queue queue = ZeCommandQueue(ctx, first(devs)) # Execute a command list execute!(queue) do list append_barrier!(list) end ``` ## Memory Operations You can perform low-level memory operations using command lists: ```julia execute!(queue) do list append_copy!(list, dst_ptr, src_ptr, size) end ``` ================================================ FILE: docs/src/memory.md ================================================ # Memory Management Efficient memory management is crucial for GPU programming. oneAPI.jl provides tools to manage device memory allocation and data transfer. ## Unified Shared Memory (USM) oneAPI uses Unified Shared Memory, which allows for pointers that can be accessible from both the host and the device, or specific to one. - **Device Memory**: Accessible only by the device. Fastest access for kernels. - **Host Memory**: Accessible by the host and device. - **Shared Memory**: Automatically migrated between host and device. `oneArray` typically uses device memory for performance. ## Allocation You can perform low-level memory allocation using the `oneL0` submodule if needed, though `oneArray` handles this automatically. ```julia using oneAPI.oneL0 # Allocate device memory ptr = oneL0.zeMemAllocDevice(context(), device(), 1024, 1) # Free memory oneL0.zeMemFree(context(), ptr) ``` ## Garbage Collection Julia's garbage collector automatically manages `oneArray` objects. However, GPU memory is a limited resource. If you are running into out-of-memory errors, you might need to manually trigger garbage collection or free arrays. ```julia a = oneArray(rand(Float32, 1024*1024*100)) a = nothing GC.gc() # Reclaim memory ``` ## Explicit Freeing For immediate memory release, you can use `unsafe_free!`: ```julia using oneAPI a = oneArray(rand(1024)) oneAPI.unsafe_free!(a) ``` **Warning**: Only use `unsafe_free!` if you are sure the array is no longer used, including by any pending GPU operations. ================================================ FILE: docs/src/onemkl.md ================================================ # oneMKL Integration oneAPI.jl provides bindings to the Intel oneMKL library, enabling high-performance linear algebra operations on Intel GPUs. ## Dense Linear Algebra (BLAS/LAPACK) Standard BLAS and LAPACK operations are automatically accelerated when using `oneArray`. ```julia using oneAPI, LinearAlgebra A = oneArray(rand(Float32, 100, 100)) B = oneArray(rand(Float32, 100, 100)) # Matrix multiplication (GEMM) C = A * B # Linear solve (AX = B) X = A \ B ``` ## Sparse Linear Algebra oneAPI.jl supports sparse matrix operations via oneMKL's sparse BLAS functionality. These integrate with Julia's `SparseArrays` standard library. ```julia using oneAPI, oneAPI.oneMKL, SparseArrays, LinearAlgebra # Create a sparse matrix on CPU A = sprand(100, 100, 0.1) # Move to GPU (converts to oneMKL format) dA = oneMKL.oneSparseMatrixCSC(A) # Create a dense vector x = oneArray(rand(100)) # Sparse matrix-vector multiplication y = dA * x ``` Note that `oneSparseMatrixCSC` is available for Compressed Sparse Column format, which is the standard in Julia. ## FFTs Fast Fourier Transforms are supported through `AbstractFFTs.jl` interface integration with oneMKL DFTs. ```julia using oneAPI, FFTW a = oneArray(rand(ComplexF32, 1024)) # Forward FFT b = fft(a) # Inverse FFT c = ifft(b) ``` ================================================ FILE: docs/src/troubleshooting.md ================================================ # Troubleshooting ## Common Issues ### No devices detected **Symptom**: `oneAPI.devices()` returns an empty list. **Solution**: 1. Ensure you are running on Linux (recommended) or WSL2. 2. Check if the Intel Compute Runtime is installed and accessible. 3. Verify your user has permissions to access the GPU render device (usually `render` group). 4. Run `oneAPI.versioninfo()` to see detailed diagnostic information. ### "Double type is not supported" **Symptom**: Kernel compilation fails with an error about `Float64` or `Double` support. **Solution**: Some Intel GPUs (especially integrated graphics) lack native hardware support for 64-bit floating point operations. - Use `Float32` instead of `Float64`. - Check support with: ```julia using oneAPI.oneL0 oneL0.module_properties(device()).fp64flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP64 != 0 ``` ### "Out of memory" errors **Symptom**: Memory allocation fails. **Solution**: - Trigger garbage collection: `GC.gc()`. - Manually free unused arrays: `oneAPI.unsafe_free!(array)`. - Check if you are exceeding the device's memory capacity. ## Debugging ### Validation Layer Enable the Level Zero validation layer to catch API misuse: ```bash export ZE_ENABLE_VALIDATION_LAYER=1 export ZE_ENABLE_PARAMETER_VALIDATION=1 ``` ### Debug Mode Enable debug mode in oneAPI.jl to use debug builds of underlying toolchains (if available): ```julia oneAPI.set_debug!(true) ``` ================================================ FILE: docs/src/usage/performance.md ================================================ # Performance Guide This guide provides tips and techniques for optimizing oneAPI.jl applications. ## Quick Wins ### 1. Use Device Memory Device memory is fastest for GPU operations: ```julia # ✅ Good: Device memory (default) a = oneArray{Float32}(undef, 1000) # ❌ Slower: Shared memory (unless CPU access is needed) a = oneArray{Float32,1,oneL0.SharedBuffer}(undef, 1000) ``` ### 2. Minimize Data Transfers Keep data on GPU between operations: ```julia # ❌ Bad: Unnecessary transfers for i in 1:100 cpu_data = Array(gpu_array) # GPU → CPU cpu_data .+= 1 gpu_array = oneArray(cpu_data) # CPU → GPU end # ✅ Good: Keep data on GPU for i in 1:100 gpu_array .+= 1 # All on GPU end ``` ### 3. Use Fused Operations Broadcasting automatically fuses operations: ```julia # ❌ Slower: Multiple kernel launches a = oneArray(rand(Float32, 1000)) b = sin.(a) c = b .+ 1.0f0 d = c .* 2.0f0 # ✅ Faster: Single fused kernel d = 2.0f0 .* (sin.(a) .+ 1.0f0) ``` ### 4. Specify Float32 GPUs are typically optimized for single precision: ```julia # ❌ Slower: Float64 (if not needed) a = oneArray(rand(Float64, 1000)) # ✅ Faster: Float32 a = oneArray(rand(Float32, 1000)) ``` ## Kernel Optimization ### Launch Configuration Choose appropriate workgroup sizes: ```julia # Typical good workgroup sizes items = 256 # Common choice, adjust based on hardware items = 128 # Try smaller if using lots of local memory items = 512 # Try larger for simple kernels # Calculate groups N = length(array) groups = cld(N, items) # Ceiling division @oneapi groups=groups items=items kernel(array) ``` ### Memory Access Patterns Coalesced memory access is crucial for performance: ```julia # ✅ Good: Coalesced access (consecutive threads access consecutive memory) function good_kernel!(output, input) i = get_global_id() @inbounds output[i] = input[i] + 1.0f0 return end # ❌ Bad: Strided access (cache inefficient) function bad_kernel!(output, input, stride) i = get_global_id() @inbounds output[i] = input[i * stride] + 1.0f0 return end ``` ### Use Local Memory Local memory is faster than global memory for data reuse: ```julia function optimized_reduction!(result, input) local_id = get_local_id() local_size = get_local_size() group_id = get_group_id() # Allocate local memory local_mem = oneLocalArray(Float32, 256) # Load global → local (coalesced) global_id = get_global_id() @inbounds local_mem[local_id] = input[global_id] barrier() # Reduce in local memory (much faster) stride = local_size ÷ 2 while stride > 0 if local_id <= stride @inbounds local_mem[local_id] += local_mem[local_id + stride] end barrier() stride ÷= 2 end # Write result if local_id == 1 @inbounds result[group_id] = local_mem[1] end return end ``` ### Minimize Barriers Barriers have overhead: ```julia # ❌ Bad: Unnecessary barriers function wasteful_kernel!(a) i = get_local_id() a[i] += 1 barrier() # Not needed if no data sharing a[i] *= 2 barrier() # Not needed return end # ✅ Good: Barriers only when needed function efficient_kernel!(a, shared) i = get_local_id() # Load to shared memory shared[i] = a[i] barrier() # Needed: ensure all loads complete # Use shared data result = shared[i] + shared[i+1] a[i] = result return end ``` ### Avoid Divergence Minimize thread divergence (different execution paths): ```julia # ❌ Bad: High divergence function divergent_kernel!(a) i = get_global_id() if i % 32 == 0 # Only 1 in 32 threads executes this @inbounds a[i] = expensive_computation(a[i]) else @inbounds a[i] += 1.0f0 end return end # ✅ Better: Separate into different kernels function uniform_kernel!(a) i = get_global_id() @inbounds a[i] += 1.0f0 return end function sparse_kernel!(a, indices) i = get_global_id() if i <= length(indices) idx = indices[i] @inbounds a[idx] = expensive_computation(a[idx]) end return end ``` ## Type Stability Type instability severely hurts performance: ```julia # ❌ Bad: Type unstable function unstable_kernel!(output, input, flag) i = get_global_id() if flag value = input[i] # Float32 else value = 0 # Int end output[i] = value * 2 # Type uncertain! return end # ✅ Good: Type stable function stable_kernel!(output, input, flag) i = get_global_id() if flag value = input[i] # Float32 else value = 0.0f0 # Float32 end output[i] = value * 2.0f0 # All Float32! return end # Check type stability @device_code_warntype @oneapi groups=1 items=10 stable_kernel!(output, input, true) ``` ## Algorithmic Optimization ### Use Library Functions Leverage optimized library implementations: ```julia using oneAPI, LinearAlgebra # ✅ Good: Use oneMKL through LinearAlgebra A = oneArray(rand(Float32, 1000, 1000)) B = oneArray(rand(Float32, 1000, 1000)) C = A * B # Uses optimized oneMKL # ❌ Bad: Write your own matrix multiplication # (unless you have a very specific use case) ``` ### Choose Right Algorithm Some algorithms parallelize better than others: ```julia # ❌ Sequential algorithm function sequential_sum(arr) sum = 0.0f0 for x in arr sum += x end return sum end # ✅ Parallel reduction result = sum(oneArray(data)) # Optimized parallel reduction ``` ## Benchmarking ### Basic Timing ```julia using BenchmarkTools, oneAPI a = oneArray(rand(Float32, 1000)) b = oneArray(rand(Float32, 1000)) # Warmup c = a .+ b synchronize() # Benchmark @benchmark begin c = $a .+ $b synchronize() end ``` ### Accurate GPU Timing Always synchronize before timing: ```julia using oneAPI a = oneArray(rand(Float32, 1_000_000)) # ❌ Wrong: Doesn't wait for GPU @time a .+= 1 # Only measures kernel launch overhead # ✅ Correct: Wait for GPU to finish @time begin a .+= 1 synchronize() end ``` ### Profiling with Time ```julia function profile_operation(a, b) # Warmup c = a .+ b synchronize() # Time kernel launch t1 = time() c = a .+ b t2 = time() launch_time = t2 - t1 # Time including synchronization synchronize() t3 = time() total_time = t3 - t1 println("Launch: ", launch_time * 1000, " ms") println("Total: ", total_time * 1000, " ms") println("Actual: ", (total_time - launch_time) * 1000, " ms") end a = oneArray(rand(Float32, 10_000_000)) b = oneArray(rand(Float32, 10_000_000)) profile_operation(a, b) ``` ## Memory Bandwidth ### Theoretical Peak Calculate theoretical bandwidth: ```julia # Example: Intel Iris Xe Graphics # 96 execution units, 1.35 GHz # Memory bandwidth: ~68 GB/s # Your kernel processes N Float32 values N = 10_000_000 bytes_transferred = N * sizeof(Float32) * 2 # Read + Write # Measure time t = @elapsed begin a .+= b synchronize() end bandwidth_achieved = bytes_transferred / t / 1e9 # GB/s println("Bandwidth: ", bandwidth_achieved, " GB/s") ``` ### Improving Bandwidth Utilization ```julia # ✅ Good: Single pass with fusion result = @. a + b * c - d / e # One pass over data # ❌ Bad: Multiple passes result = a .+ b result = result .* c result = result .- d result = result ./ e # Four separate passes over data! ``` ## Common Performance Issues ### Issue 1: Too Many Small Kernels ```julia # ❌ Bad: Many small kernel launches for i in 1:100 a .+= 1 # 100 kernel launches! end # ✅ Good: Single kernel or batching a .+= 100 # Single operation ``` ### Issue 2: Unnecessary Allocations ```julia # ❌ Bad: Allocates temporary c = a .+ b # Allocates new array # ✅ Good: In-place operation c = similar(a) c .= a .+ b # Uses pre-allocated array ``` ### Issue 3: Wrong Number Type ```julia # ❌ Bad: Mixed types a = oneArray(rand(Float32, 1000)) b = a .+ 1.0 # Float64 constant! # ✅ Good: Matching types b = a .+ 1.0f0 # Float32 constant ``` ## Performance Checklist - [ ] Using device memory (not shared unless necessary) - [ ] Minimizing CPU-GPU transfers - [ ] Using Float32 (unless Float64 required) - [ ] Fusing operations with broadcasting - [ ] Type-stable kernels (`@device_code_warntype`) - [ ] Appropriate workgroup sizes - [ ] Coalesced memory access - [ ] Minimal thread divergence - [ ] Leveraging local memory for reuse - [ ] Using library functions when available - [ ] Synchronizing before timing - [ ] Avoiding unnecessary allocations ## Hardware-Specific Tuning Different Intel GPUs have different characteristics: ```julia using oneAPI.oneL0 dev = device() props = properties(dev) compute_props = compute_properties(dev) println("Device: ", props.name) println("EU count: ", compute_props.numEUsPerSubslice * compute_props.numSubslicesPerSlice * compute_props.numSlices) println("Max workgroup size: ", compute_props.maxTotalGroupSize) println("Max local memory: ", compute_props.maxSharedLocalMemory, " bytes") # Adjust your code based on these properties ``` ## Advanced: Async Operations For overlapping compute and transfers (advanced users): ```julia using oneAPI.oneL0 ctx = context() dev = device() # Create multiple queues for async operations queue1 = ZeCommandQueue(ctx, dev) queue2 = ZeCommandQueue(ctx, dev) # Launch kernel on queue1 execute!(queue1) do list # ... kernel launch ... end # Overlap with transfer on queue2 execute!(queue2) do list append_copy!(list, dst, src, size) end # Synchronize both synchronize(queue1) synchronize(queue2) ``` ## Further Resources - [Intel GPU Architecture](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-gpu-architecture.html) - [oneAPI Programming Guide](https://www.intel.com/content/www/us/en/developer/tools/oneapi/programming-guide.html) - [Level Zero Specification](https://spec.oneapi.io/level-zero/latest/index.html) ================================================ FILE: examples/gemm.jl ================================================ using oneAPI, Test A = oneArray(rand(Float32, 2, 3)) B = oneArray(rand(Float32, 3, 4)) C = A * B @test Array(C) ≈ Array(A) * Array(B) ================================================ FILE: examples/vadd.jl ================================================ using oneAPI, Test function vadd(a, b, c) i = get_global_id() @inbounds c[i] = a[i] + b[i] return end dims = (2,) a = round.(rand(Float32, dims) * 100) b = round.(rand(Float32, dims) * 100) c = similar(a) d_a = oneArray(a) d_b = oneArray(b) d_c = oneArray(c) len = prod(dims) @oneapi items=len vadd(d_a, d_b, d_c) c = Array(d_c) @test a+b ≈ c ================================================ FILE: lib/level-zero/barrier.jl ================================================ export append_barrier!, device_barrier append_barrier!(list::ZeCommandList, signal_event=nothing, wait_events::ZeEvent...) = zeCommandListAppendBarrier(list, something(signal_event, C_NULL), length(wait_events), [wait_events...]) device_barrier(dev::ZeDevice) = zeDeviceSystemBarrier(dev) ================================================ FILE: lib/level-zero/cmdlist.jl ================================================ # list export ZeCommandList, execute! mutable struct ZeCommandList handle::ze_command_list_handle_t context::ZeContext device::ZeDevice function ZeCommandList(ctx::ZeContext, dev::ZeDevice, ordinal=1; flags=0) desc_ref = Ref(ze_command_list_desc_t(; commandQueueGroupOrdinal=ordinal-1, flags, )) handle_ref = Ref{ze_command_list_handle_t}() zeCommandListCreate(ctx, dev, desc_ref, handle_ref) obj = new(handle_ref[], ctx, dev) finalizer(obj) do obj zeCommandListDestroy(obj) end obj end end Base.unsafe_convert(::Type{ze_command_list_handle_t}, list::ZeCommandList) = list.handle Base.:(==)(a::ZeCommandList, b::ZeCommandList) = a.handle == b.handle Base.hash(e::ZeCommandList, h::UInt) = hash(e.handle, h) Base.close(list::ZeCommandList) = zeCommandListClose(list) Base.reset(list::ZeCommandList) = zeCommandListReset(list) """ ZeCommandList(dev::ZeDevice, ...) do list append_...!(list) end Create a command list for device `dev`, passing in a do block that appends operations. The list is then closed and can be used immediately, e.g. for execution. """ function ZeCommandList(f::Base.Callable, args...; kwargs...) list = ZeCommandList(args...; kwargs...) f(list) close(list) return list end execute!(queue::ZeCommandQueue, lists::Vector{ZeCommandList}, fence=nothing) = zeCommandQueueExecuteCommandLists(queue, length(lists), lists, something(fence, C_NULL)) """ execute!(queue::ZeCommandQueue, ...) do list append_...!(list) end Create a command list for the device that owns `queue`, passing in a do block that appends operations. The list is then closed and executed on the queue. """ function execute!(f::Base.Callable, queue::ZeCommandQueue, fence=nothing; kwargs...) list = ZeCommandList(f, queue.context, queue.device, queue.ordinal; kwargs...) execute!(queue, [list], fence) end ================================================ FILE: lib/level-zero/cmdqueue.jl ================================================ # queue export ZeCommandQueue, synchronize mutable struct ZeCommandQueue handle::ze_command_queue_handle_t context::ZeContext device::ZeDevice ordinal::Int function ZeCommandQueue(ctx::ZeContext, dev::ZeDevice, ordinal=1, index=1; flags=0, mode::ze_command_queue_mode_t=ZE_COMMAND_QUEUE_MODE_DEFAULT, priority::ze_command_queue_priority_t=ZE_COMMAND_QUEUE_PRIORITY_NORMAL) desc_ref = Ref(ze_command_queue_desc_t(; ordinal=ordinal-1, index=index-1, flags, mode, priority )) handle_ref = Ref{ze_command_queue_handle_t}() zeCommandQueueCreate(ctx, dev, desc_ref, handle_ref) obj = new(handle_ref[], ctx, dev, ordinal) finalizer(obj) do obj zeCommandQueueDestroy(obj) end obj end end Base.unsafe_convert(::Type{ze_command_queue_handle_t}, queue::ZeCommandQueue) = queue.handle Base.:(==)(a::ZeCommandQueue, b::ZeCommandQueue) = a.handle == b.handle Base.hash(e::ZeCommandQueue, h::UInt) = hash(e.handle, h) synchronize(queue::ZeCommandQueue, timeout::Number=typemax(UInt64)) = zeCommandQueueSynchronize(queue, timeout) ## groups export command_queue_groups, compute_groups struct ZeCommandQueueGroups device::ZeDevice end command_queue_groups(dev::ZeDevice) = ZeCommandQueueGroups(dev) Base.eltype(::ZeCommandQueueGroups) = ZeCommandQueueGroup function Base.iterate(groups::ZeCommandQueueGroups, i=1) i >= length(groups) + 1 ? nothing : (ZeCommandQueueGroup(groups, i), i+1) end Base.length(groups::ZeCommandQueueGroups) = length(properties(groups)) function properties(groups::ZeCommandQueueGroups) count_ref = Ref{UInt32}(0) zeDeviceGetCommandQueueGroupProperties(groups.device, count_ref, C_NULL) all_props = fill(ze_command_queue_group_properties_t(), count_ref[]) zeDeviceGetCommandQueueGroupProperties(groups.device, count_ref, all_props) return [(flags=props.flags, maxMemoryFillPatternSize=UInt(props.maxMemoryFillPatternSize), numQueues=Int(props.numQueues), ) for props in all_props[1:count_ref[]]] end Base.IteratorSize(::ZeCommandQueueGroups) = Base.HasLength() struct ZeCommandQueueGroup groups::ZeCommandQueueGroups ordinal::Int end properties(group::ZeCommandQueueGroup) = properties(group.groups)[group.ordinal] # short-hands compute_groups(dev::ZeDevice) = filter(collect(command_queue_groups(dev))) do group properties(group).flags & oneL0.ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE != 0 end ================================================ FILE: lib/level-zero/common.jl ================================================ """ ze_make_version(major::Integer, minor::Integer) -> UInt32 32-bit unsigned integer version number from major and minor components. This should be the Julia equivalent of the C macro: `#define ZE_MAKE_VERSION( _major, _minor ) (( _major << 16 )|( _minor & 0x0000ffff))` """ function ZE_MAKE_VERSION(major::Integer, minor::Integer) # Shift the major version 16 bits to the left # and combine it with the minor version using a bitwise OR. # The `& 0xffff` is implicit for standard integer types when combining, # but we can be explicit if needed. The result is cast to UInt32. return (UInt32(major) << 16) | (UInt32(minor) & 0x0000ffff) end unmake_version(ver) = VersionNumber(Int(ver) >> 16, Int(ver) & 0x0000ffff) ================================================ FILE: lib/level-zero/context.jl ================================================ export ZeContext, status mutable struct ZeContext handle::ze_context_handle_t driver::ZeDriver function ZeContext(drv::ZeDriver) desc_ref = Ref(ze_context_desc_t()) handle_ref = Ref{ze_context_handle_t}() zeContextCreate(drv, desc_ref, handle_ref) obj = new(handle_ref[], drv) finalizer(obj) do obj zeContextDestroy(obj) end obj end end Base.unsafe_convert(::Type{ze_context_handle_t}, dev::ZeContext) = dev.handle Base.:(==)(a::ZeContext, b::ZeContext) = a.handle == b.handle Base.hash(e::ZeContext, h::UInt) = hash(e.handle, h) status(ctx::ZeContext) = zeContextGetStatus(ctx) ================================================ FILE: lib/level-zero/copy.jl ================================================ # copies export append_copy!, append_fill!, append_prefetch!, append_advise! append_copy!(list::ZeCommandList, dst::Union{Ptr,ZePtr}, src::Union{Ptr,ZePtr}, size::Integer, signal_event::Union{ZeEvent,Nothing}=nothing, wait_events::ZeEvent...) = zeCommandListAppendMemoryCopy(list, dst, src, size, something(signal_event, C_NULL), length(wait_events), [wait_events...]) append_fill!(list::ZeCommandList, ptr::Union{Ptr,ZePtr}, pattern::Union{Ptr,ZePtr}, pattern_size::Integer, size::Integer, signal_event::Union{ZeEvent,Nothing}=nothing, wait_events::ZeEvent...) = zeCommandListAppendMemoryFill(list, ptr, pattern, pattern_size, size, something(signal_event, C_NULL), length(wait_events), [wait_events...]) append_prefetch!(list::ZeCommandList, ptr::Union{Ptr,ZePtr}, size::Integer) = zeCommandListAppendMemoryPrefetch(list, ptr, size) append_advise!(list::ZeCommandList, dev::ZeDevice, ptr::Union{Ptr,ZePtr}, size::Integer, advise::ze_memory_advice_t) = zeCommandListAppendMemAdvise(list, dev, ptr, size, advise) ================================================ FILE: lib/level-zero/device.jl ================================================ export ZeDevice, properties, compute_properties, module_properties, memory_properties, memory_access_properties, cache_properties, image_properties, p2p_properties struct ZeDevice handle::ze_device_handle_t driver::ZeDriver # only accept handles, don't convert ZeDevice(handle::ze_device_handle_t, driver::ZeDriver) = new(handle, driver) end Base.unsafe_convert(::Type{ze_device_handle_t}, dev::ZeDevice) = dev.handle Base.:(==)(a::ZeDevice, b::ZeDevice) = a.handle == b.handle Base.hash(e::ZeDevice, h::UInt) = hash(e.handle, h) function Base.show(io::IO, dev::ZeDevice) props = properties(dev) print(io, "ZeDevice(") if props.type == ZE_DEVICE_TYPE_GPU print(io, "GPU") elseif props.type == ZE_DEVICE_TYPE_FPGA print(io, "FPGA") end print(io, ", vendor ") show(io, props.vendorId) print(io, ", device ") show(io, props.deviceId) if props.subdeviceId !== nothing print(io, ", sub-device ") show(io, props.subdeviceId) end print(io, ")") end function Base.show(io::IO, ::MIME"text/plain", dev::ZeDevice) show(io, dev) props = properties(dev) print(io, ": $(props.name)") end ## properties function properties(dev::ZeDevice) props_ref = Ref(ze_device_properties_t()) zeDeviceGetProperties(dev, props_ref) props = props_ref[] return ( type=props.type, vendorId=UInt16(props.vendorId), deviceId=UInt16(props.deviceId), flags=props.flags, subdeviceId=(props.flags&ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE == 0) ? nothing : props.subdeviceId, coreClockRate=Int(props.coreClockRate), maxMemAllocSize=Int(props.maxMemAllocSize), maxHardwareContexts=Int(props.maxHardwareContexts), maxCommandQueuePriority=Int(props.maxCommandQueuePriority), numThreadsPerEU=Int(props.numThreadsPerEU), physicalEUSimdWidth=Int(props.physicalEUSimdWidth), numEUsPerSubslice=Int(props.numEUsPerSubslice), numSubslicesPerSlice=Int(props.numSubslicesPerSlice), numSlices=Int(props.numSlices), timerResolution=Int(props.timerResolution), timestampValidBits=Int(props.timestampValidBits), kernelTimestampValidBits=Int(props.kernelTimestampValidBits), uuid=Base.UUID(reinterpret(UInt128, [props.uuid.id...])[1]), name=String(UInt8[props.name[1:findfirst(isequal(0), props.name)-1]...]), ) end function compute_properties(dev::ZeDevice) props_ref = Ref(ze_device_compute_properties_t()) zeDeviceGetComputeProperties(dev, props_ref) props = props_ref[] return ( maxTotalGroupSize=Int(props.maxTotalGroupSize), maxGroupSizeX=Int(props.maxGroupSizeX), maxGroupSizeY=Int(props.maxGroupSizeY), maxGroupSizeZ=Int(props.maxGroupSizeZ), maxGroupCountX=Int(props.maxGroupCountX), maxGroupCountY=Int(props.maxGroupCountY), maxGroupCountZ=Int(props.maxGroupCountZ), maxSharedLocalMemory=Int(props.maxSharedLocalMemory), subGroupSizes=Int.(props.subGroupSizes[1:props.numSubGroupSizes]), ) end function module_properties(dev::ZeDevice) props_ref = Ref(ze_device_module_properties_t()) zeDeviceGetModuleProperties(dev, props_ref) props = props_ref[] return ( spirvVersionSupported=props.spirvVersionSupported==0 ? nothing : unmake_version(props.spirvVersionSupported), flags=props.flags, fp16flags=props.fp16flags, fp32flags=props.fp32flags, fp64flags=props.fp64flags, maxArgumentsSize=Int(props.maxArgumentsSize), printfBufferSize=Int(props.printfBufferSize), nativeKernelSupported=Base.UUID(reinterpret(UInt128, [props.nativeKernelSupported.id...])[1]), ) end function memory_properties(dev::ZeDevice) count_ref = Ref{UInt32}(0) zeDeviceGetMemoryProperties(dev, count_ref, C_NULL) all_props = fill(ze_device_memory_properties_t(), count_ref[]) zeDeviceGetMemoryProperties(dev, count_ref, all_props) return [(maxClockRate=Int(props.maxClockRate), maxBusWidth=Int(props.maxBusWidth), totalSize=Int(props.totalSize), ) for props in all_props[1:count_ref[]]] end function memory_access_properties(dev::ZeDevice) props_ref = Ref(ze_device_memory_access_properties_t()) zeDeviceGetMemoryAccessProperties(dev, props_ref) props = props_ref[] return ( hostAllocCapabilities=Int(props.hostAllocCapabilities), deviceAllocCapabilities=Int(props.deviceAllocCapabilities), sharedSingleDeviceAllocCapabilities=Int(props.sharedSingleDeviceAllocCapabilities), sharedCrossDeviceAllocCapabilities=Int(props.sharedCrossDeviceAllocCapabilities), sharedSystemAllocCapabilities=Int(props.sharedSystemAllocCapabilities), ) end function cache_properties(dev::ZeDevice) count_ref = Ref{UInt32}(0) zeDeviceGetCacheProperties(dev, count_ref, C_NULL) all_props = fill(ze_device_cache_properties_t(), count_ref[]) zeDeviceGetCacheProperties(dev, count_ref, all_props) return [(flags=props.flags, cacheSize=Int(props.cacheSize), ) for props in all_props[1:count_ref[]]] end function image_properties(dev::ZeDevice) props_ref = Ref(ze_device_image_properties_t()) zeDeviceGetImageProperties(dev, props_ref) props = props_ref[] return ( maxImageDims1D=Int(props.maxImageDims1D), maxImageDims2D=Int(props.maxImageDims2D), maxImageDims3D=Int(props.maxImageDims3D), maxImageBufferSize=Int(props.maxImageBufferSize), maxImageArraySlices=Int(props.maxImageArraySlices), maxSamplers=Int(props.maxSamplers), maxReadImageArgs=Int(props.maxReadImageArgs), maxWriteImageArgs=Int(props.maxWriteImageArgs), ) end function p2p_properties(dev1, dev2::ZeDevice) props_ref = Ref(ze_device_p2p_properties_t()) zeDeviceGetP2PProperties(dev1, dev2, props_ref) props = props_ref[] return ( flags=props.flags, ) end ## device iteration export devices struct ZeDevices handles::Vector{ze_device_handle_t} driver::ZeDriver function ZeDevices(drv::ZeDriver) count_ref = Ref{UInt32}(0) zeDeviceGet(drv, count_ref, C_NULL) handles = fill(ze_device_handle_t(), count_ref[]) zeDeviceGet(drv, count_ref, handles) new(handles, drv) end end devices(drv::ZeDriver) = ZeDevices(drv) Base.eltype(::ZeDevices) = ZeDevice function Base.iterate(iter::ZeDevices, i=1) i >= length(iter) + 1 ? nothing : (ZeDevice(iter.handles[i], iter.driver), i+1) end Base.length(iter::ZeDevices) = length(iter.handles) Base.IteratorSize(::ZeDevices) = Base.HasLength() Base.keys(iter::ZeDevices) = 1:length(iter) function Base.show(io::IO, ::MIME"text/plain", iter::ZeDevices) print(io, "ZeDevice iterator for $(length(iter)) devices") if !isempty(iter) print(io, ":") for (i,dev) in enumerate(iter) print(io, "\n$(i). $(properties(dev).name)") end end end Base.getindex(iter::ZeDevices, i::Integer) = ZeDevice(iter.handles[i], iter.driver) ================================================ FILE: lib/level-zero/driver.jl ================================================ export ZeDriver, api_version, properties, ipc_properties, extension_properties struct ZeDriver handle::ze_driver_handle_t # only accept handles, don't convert ZeDriver(handle::ze_driver_handle_t) = new(handle) end Base.unsafe_convert(::Type{ze_driver_handle_t}, drv::ZeDriver) = drv.handle Base.:(==)(a::ZeDriver, b::ZeDriver) = a.handle == b.handle Base.hash(e::ZeDriver, h::UInt) = hash(e.handle, h) function api_version(drv::ZeDriver) version_ref = Ref{ze_api_version_t}() zeDriverGetApiVersion(drv, version_ref) unmake_version(version_ref[]) end function Base.show(io::IO, drv::ZeDriver) props = properties(drv) print(io, "ZeDriver($(props.uuid))") end function Base.show(io::IO, ::MIME"text/plain", drv::ZeDriver) show(io, drv) props = properties(drv) print(io, ": version $(props.driverVersion)") end ## driver iteration export drivers struct ZeDrivers handles::Vector{ze_driver_handle_t} function ZeDrivers() count_ref = Ref{UInt32}(0) zeDriverGet(count_ref, C_NULL) handles = Vector{ze_driver_handle_t}(undef, count_ref[]) zeDriverGet(count_ref, handles) new(handles) end end drivers() = ZeDrivers() Base.eltype(::ZeDrivers) = ZeDriver function Base.iterate(iter::ZeDrivers, i=1) i >= length(iter) + 1 ? nothing : (ZeDriver(iter.handles[i]), i+1) end Base.length(iter::ZeDrivers) = length(iter.handles) Base.IteratorSize(::ZeDrivers) = Base.HasLength() function Base.show(io::IO, mime::MIME"text/plain", iter::ZeDrivers) print(io, "ZeDriver iterator for $(length(iter)) drivers") if !isempty(iter) print(io, ":") for (i,drv) in enumerate(iter) print(io, "\n$(i). ") show(io, mime, drv) end end end Base.getindex(iter::ZeDrivers, i::Integer) = ZeDriver(iter.handles[i]) ## properties function properties(drv::ZeDriver) props_ref = Ref(ze_driver_properties_t()) zeDriverGetProperties(drv, props_ref) props = props_ref[] return ( uuid=Base.UUID(reinterpret(UInt128, [props.uuid.id...])[1]), driverVersion=VersionNumber((props.driverVersion & 0xFF000000) >> 24, (props.driverVersion & 0x00FF0000) >> 16, props.driverVersion & 0x0000FFFF), ) end function ipc_properties(drv::ZeDriver) props_ref = Ref(ze_driver_ipc_properties_t()) zeDriverGetIpcProperties(drv, props_ref) props = props_ref[] return ( flags=props.flags, ) end function extension_properties(drv::ZeDriver) count_ref = Ref{UInt32}(0) zeDriverGetExtensionProperties(drv, count_ref, C_NULL) all_props = Vector{ze_driver_extension_properties_t}(undef, count_ref[]) zeDriverGetExtensionProperties(drv, count_ref, all_props) extensions = Dict{String,VersionNumber}() for prop in all_props[1:count_ref[]] name = String(UInt8[prop.name[1:findfirst(isequal(0), prop.name)-1]...]) version = unmake_version(prop.version) extensions[name] = version end return extensions end ================================================ FILE: lib/level-zero/error.jl ================================================ # Error type and decoding functionality export ZeError struct ZeError <: Exception code::ze_result_t end Base.convert(::Type{ze_result_t}, err::ZeError) = err.code Base.showerror(io::IO, err::ZeError) = print(io, "ZeError: ", description(err), " (code $(reinterpret(Int32, err.code)), $(name(err)))") Base.show(io::IO, ::MIME"text/plain", err::ZeError) = print(io, "ZeError($(err.code))") name(err::ZeError) = string(err.code) ## COV_EXCL_START function description(err::ZeError) if err.code == RESULT_SUCCESS "success" elseif err.code == RESULT_NOT_READY "synchronization primitive not signaled" elseif err.code == RESULT_ERROR_DEVICE_LOST "device hung, reset, was removed, or driver update occurred" elseif err.code == RESULT_ERROR_OUT_OF_HOST_MEMORY "insufficient host memory to satisfy call" elseif err.code == RESULT_ERROR_OUT_OF_DEVICE_MEMORY "insufficient device memory to satisfy call" elseif err.code == RESULT_ERROR_MODULE_BUILD_FAILURE "error occurred when building module, see build log for details" elseif err.code == RESULT_ERROR_INSUFFICIENT_PERMISSIONS "access denied due to permission level" elseif err.code == RESULT_ERROR_NOT_AVAILABLE "resource already in use and simultaneous access not allowed" elseif err.code == RESULT_ERROR_UNINITIALIZED "driver is not initialized" elseif err.code == RESULT_ERROR_UNSUPPORTED_VERSION "generic error code for unsupported versions" elseif err.code == RESULT_ERROR_UNSUPPORTED_FEATURE "generic error code for unsupported features" elseif err.code == RESULT_ERROR_INVALID_ARGUMENT "generic error code for invalid arguments" elseif err.code == RESULT_ERROR_INVALID_NULL_HANDLE "handle argument is not valid" elseif err.code == RESULT_ERROR_HANDLE_OBJECT_IN_USE "object pointed to by handle still in-use by device" elseif err.code == RESULT_ERROR_INVALID_NULL_POINTER "pointer argument may not be nullptr" elseif err.code == RESULT_ERROR_INVALID_SIZE "size argument is invalid (e.g., must not be zero)" elseif err.code == RESULT_ERROR_UNSUPPORTED_SIZE "size argument is not supported by the device (e.g., too large)" elseif err.code == RESULT_ERROR_UNSUPPORTED_ALIGNMENT "alignment argument is not supported by the device (e.g., too small)" elseif err.code == RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT "synchronization object in invalid state" elseif err.code == RESULT_ERROR_INVALID_ENUMERATION "enumerator argument is not valid" elseif err.code == RESULT_ERROR_UNSUPPORTED_ENUMERATION "enumerator argument is not supported by the device" elseif err.code == RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT "image format is not supported by the device" elseif err.code == RESULT_ERROR_INVALID_NATIVE_BINARY "native binary is not supported by the device" elseif err.code == RESULT_ERROR_INVALID_GLOBAL_NAME "global variable is not found in the module" elseif err.code == RESULT_ERROR_INVALID_KERNEL_NAME "kernel name is not found in the module" elseif err.code == RESULT_ERROR_INVALID_FUNCTION_NAME "function name is not found in the module" elseif err.code == RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION "group size dimension is not valid for the kernel or device" elseif err.code == RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION "global width dimension is not valid for the kernel or device" elseif err.code == RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX "kernel argument index is not valid for kernel" elseif err.code == RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE "kernel argument size does not match kernel" elseif err.code == RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE "value of kernel attribute is not valid for the kernel or device" elseif err.code == RESULT_ERROR_INVALID_COMMAND_LIST_TYPE "command list type does not match command queue type" elseif err.code == RESULT_ERROR_OVERLAPPING_REGIONS "copy operations do not support overlapping regions of memory" elseif err.code == RESULT_ERROR_UNKNOWN "unknown or internal error" else "no description for this error" end end ## COV_EXCL_STOP @enum_without_prefix _ze_result_t ZE_ ================================================ FILE: lib/level-zero/event.jl ================================================ # pool export ZeEventPool mutable struct ZeEventPool handle::ze_event_pool_handle_t context::ZeContext function ZeEventPool(ctx::ZeContext, count::Integer, devs::ZeDevice...; flags=0) desc_ref = Ref(ze_event_pool_desc_t(; flags, count)) handle_ref = Ref{ze_event_pool_handle_t}() zeEventPoolCreate(ctx, desc_ref, length(devs), isempty(devs) ? C_NULL : [devs...], handle_ref) obj = new(handle_ref[], ctx) finalizer(obj) do obj zeEventPoolDestroy(obj) end obj end end Base.unsafe_convert(::Type{ze_event_pool_handle_t}, pool::ZeEventPool) = pool.handle Base.:(==)(a::ZeEventPool, b::ZeEventPool) = a.handle == b.handle Base.hash(e::ZeEventPool, h::UInt) = hash(e.handle, h) Base.getindex(pool::ZeEventPool, i::Integer) = ZeEvent(pool, i) # event export ZeEvent, append_wait!, signal, append_signal!, append_reset!, kernel_timestamp mutable struct ZeEvent handle::ze_event_handle_t pool::ZeEventPool function ZeEvent(pool, index::Integer) desc_ref = Ref(ze_event_desc_t(; index=index-1)) handle_ref = Ref{ze_event_handle_t}() zeEventCreate(pool, desc_ref, handle_ref) obj = new(handle_ref[], pool) finalizer(obj) do obj zeEventDestroy(obj) end obj end end Base.unsafe_convert(::Type{ze_event_handle_t}, event::ZeEvent) = event.handle Base.:(==)(a::ZeEvent, b::ZeEvent) = a.handle == b.handle Base.hash(e::ZeEvent, h::UInt) = hash(e.handle, h) signal(event::ZeEvent) = zeEventHostSignal(event) append_signal!(list::ZeCommandList, event::ZeEvent) = zeCommandListAppendSignalEvent(list, event) Base.wait(event::ZeEvent, timeout::Number=typemax(UInt64)) = zeEventHostSynchronize(event, timeout) append_wait!(list::ZeCommandList, events::ZeEvent...) = zeCommandListAppendWaitOnEvents(list, length(events), [events...]) Base.reset(event::ZeEvent) = zeEventHostReset(event) append_reset!(list::ZeCommandList, event::ZeEvent) = zeCommandListAppendEventReset(list, event) function Base.isdone(event::ZeEvent) res = unchecked_zeEventQueryStatus(event) if res == RESULT_NOT_READY return false elseif res == RESULT_SUCCESS return true else throw_api_error(res) end end function kernel_timestamp(event) timestamp_ref = Ref{ze_kernel_timestamp_result_t}() zeEventQueryKernelTimestamp(event, timestamp_ref) # TODO: convert using ze_device_properties_t.timerResolution # TODO: mask by ze_device_properties_t.kernelTimestampValidBits # https://spec.oneapi.com/level-zero/latest/core/PROG.html#kernel-timestamp-events # but how to get the device? timestamp = timestamp_ref[] return (; :global => ( start = timestamp._global.kernelStart == -1%UInt32 ? nothing : Int(timestamp._global.kernelStart), stop = timestamp._global.kernelEnd == -1%UInt32 ? nothing : Int(timestamp._global.kernelEnd) ), :context => ( start = timestamp.context.kernelStart == -1%UInt32 ? nothing : Int(timestamp.context.kernelStart), stop = timestamp.context.kernelEnd == -1%UInt32 ? nothing : Int(timestamp.context.kernelEnd) ) ) end ================================================ FILE: lib/level-zero/fence.jl ================================================ # fence export ZeFence mutable struct ZeFence handle::ze_fence_handle_t queue::ZeCommandQueue function ZeFence(queue) desc_ref = Ref(ze_fence_desc_t()) handle_ref = Ref{ze_fence_handle_t}() zeFenceCreate(queue, desc_ref, handle_ref) obj = new(handle_ref[], queue) finalizer(obj) do obj zeFenceDestroy(obj) end obj end end Base.unsafe_convert(::Type{ze_fence_handle_t}, fence::ZeFence) = fence.handle Base.:(==)(a::ZeFence, b::ZeFence) = a.handle == b.handle Base.hash(e::ZeFence, h::UInt) = hash(e.handle, h) Base.wait(fence::ZeFence, timeout::Number=typemax(UInt64)) = zeFenceHostSynchronize(fence, timeout) Base.reset(fence::ZeFence) = zeFenceReset(fence) function Base.isdone(fence::ZeFence) res = unchecked_zeFenceQueryStatus(fence) if res == RESULT_NOT_READY return false elseif res == RESULT_SUCCESS return true else throw_api_error(res) end end ================================================ FILE: lib/level-zero/libze.jl ================================================ using CEnum: CEnum, @cenum # outlined functionality to avoid GC frame allocation @noinline function throw_api_error(res) if res == RESULT_ERROR_OUT_OF_HOST_MEMORY || res == RESULT_ERROR_OUT_OF_DEVICE_MEMORY throw(OutOfGPUMemoryError()) else throw(ZeError(res)) end end function check(f) res = retry_reclaim(err -> err == RESULT_ERROR_OUT_OF_HOST_MEMORY || err == RESULT_ERROR_OUT_OF_DEVICE_MEMORY) do return f() end if res != RESULT_SUCCESS throw_api_error(res) end return end const ze_bool_t = UInt8 mutable struct _ze_driver_handle_t end const ze_driver_handle_t = Ptr{_ze_driver_handle_t} mutable struct _ze_device_handle_t end const ze_device_handle_t = Ptr{_ze_device_handle_t} mutable struct _ze_context_handle_t end const ze_context_handle_t = Ptr{_ze_context_handle_t} mutable struct _ze_command_queue_handle_t end const ze_command_queue_handle_t = Ptr{_ze_command_queue_handle_t} mutable struct _ze_command_list_handle_t end const ze_command_list_handle_t = Ptr{_ze_command_list_handle_t} mutable struct _ze_fence_handle_t end const ze_fence_handle_t = Ptr{_ze_fence_handle_t} mutable struct _ze_event_pool_handle_t end const ze_event_pool_handle_t = Ptr{_ze_event_pool_handle_t} mutable struct _ze_event_handle_t end const ze_event_handle_t = Ptr{_ze_event_handle_t} mutable struct _ze_image_handle_t end const ze_image_handle_t = Ptr{_ze_image_handle_t} mutable struct _ze_module_handle_t end const ze_module_handle_t = Ptr{_ze_module_handle_t} mutable struct _ze_module_build_log_handle_t end const ze_module_build_log_handle_t = Ptr{_ze_module_build_log_handle_t} mutable struct _ze_kernel_handle_t end const ze_kernel_handle_t = Ptr{_ze_kernel_handle_t} mutable struct _ze_sampler_handle_t end const ze_sampler_handle_t = Ptr{_ze_sampler_handle_t} mutable struct _ze_physical_mem_handle_t end const ze_physical_mem_handle_t = Ptr{_ze_physical_mem_handle_t} mutable struct _ze_fabric_vertex_handle_t end const ze_fabric_vertex_handle_t = Ptr{_ze_fabric_vertex_handle_t} mutable struct _ze_fabric_edge_handle_t end const ze_fabric_edge_handle_t = Ptr{_ze_fabric_edge_handle_t} struct _ze_ipc_mem_handle_t data::NTuple{64,Cchar} end const ze_ipc_mem_handle_t = _ze_ipc_mem_handle_t struct _ze_ipc_event_pool_handle_t data::NTuple{64,Cchar} end const ze_ipc_event_pool_handle_t = _ze_ipc_event_pool_handle_t @cenum _ze_result_t::UInt32 begin ZE_RESULT_SUCCESS = 0 ZE_RESULT_NOT_READY = 1 ZE_RESULT_ERROR_DEVICE_LOST = 1879048193 ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY = 1879048194 ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY = 1879048195 ZE_RESULT_ERROR_MODULE_BUILD_FAILURE = 1879048196 ZE_RESULT_ERROR_MODULE_LINK_FAILURE = 1879048197 ZE_RESULT_ERROR_DEVICE_REQUIRES_RESET = 1879048198 ZE_RESULT_ERROR_DEVICE_IN_LOW_POWER_STATE = 1879048199 ZE_RESULT_EXP_ERROR_DEVICE_IS_NOT_VERTEX = 2146435073 ZE_RESULT_EXP_ERROR_VERTEX_IS_NOT_DEVICE = 2146435074 ZE_RESULT_EXP_ERROR_REMOTE_DEVICE = 2146435075 ZE_RESULT_EXP_ERROR_OPERANDS_INCOMPATIBLE = 2146435076 ZE_RESULT_EXP_RTAS_BUILD_RETRY = 2146435077 ZE_RESULT_EXP_RTAS_BUILD_DEFERRED = 2146435078 ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS = 1879113728 ZE_RESULT_ERROR_NOT_AVAILABLE = 1879113729 ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE = 1879179264 ZE_RESULT_WARNING_DROPPED_DATA = 1879179265 ZE_RESULT_ERROR_UNINITIALIZED = 2013265921 ZE_RESULT_ERROR_UNSUPPORTED_VERSION = 2013265922 ZE_RESULT_ERROR_UNSUPPORTED_FEATURE = 2013265923 ZE_RESULT_ERROR_INVALID_ARGUMENT = 2013265924 ZE_RESULT_ERROR_INVALID_NULL_HANDLE = 2013265925 ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE = 2013265926 ZE_RESULT_ERROR_INVALID_NULL_POINTER = 2013265927 ZE_RESULT_ERROR_INVALID_SIZE = 2013265928 ZE_RESULT_ERROR_UNSUPPORTED_SIZE = 2013265929 ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT = 2013265930 ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT = 2013265931 ZE_RESULT_ERROR_INVALID_ENUMERATION = 2013265932 ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION = 2013265933 ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT = 2013265934 ZE_RESULT_ERROR_INVALID_NATIVE_BINARY = 2013265935 ZE_RESULT_ERROR_INVALID_GLOBAL_NAME = 2013265936 ZE_RESULT_ERROR_INVALID_KERNEL_NAME = 2013265937 ZE_RESULT_ERROR_INVALID_FUNCTION_NAME = 2013265938 ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION = 2013265939 ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION = 2013265940 ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX = 2013265941 ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE = 2013265942 ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE = 2013265943 ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED = 2013265944 ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE = 2013265945 ZE_RESULT_ERROR_OVERLAPPING_REGIONS = 2013265946 ZE_RESULT_WARNING_ACTION_REQUIRED = 2013265947 ZE_RESULT_ERROR_INVALID_KERNEL_HANDLE = 2013265948 ZE_RESULT_EXT_RTAS_BUILD_RETRY = 2013265949 ZE_RESULT_EXT_RTAS_BUILD_DEFERRED = 2013265950 ZE_RESULT_EXT_ERROR_OPERANDS_INCOMPATIBLE = 2013265951 ZE_RESULT_ERROR_SURVIVABILITY_MODE_DETECTED = 2013265952 ZE_RESULT_ERROR_UNKNOWN = 2147483646 ZE_RESULT_FORCE_UINT32 = 2147483647 end const ze_result_t = _ze_result_t @cenum _ze_structure_type_t::UInt32 begin ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES = 1 ZE_STRUCTURE_TYPE_DRIVER_IPC_PROPERTIES = 2 ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES = 3 ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES = 4 ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES = 5 ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES = 6 ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES = 7 ZE_STRUCTURE_TYPE_DEVICE_MEMORY_ACCESS_PROPERTIES = 8 ZE_STRUCTURE_TYPE_DEVICE_CACHE_PROPERTIES = 9 ZE_STRUCTURE_TYPE_DEVICE_IMAGE_PROPERTIES = 10 ZE_STRUCTURE_TYPE_DEVICE_P2P_PROPERTIES = 11 ZE_STRUCTURE_TYPE_DEVICE_EXTERNAL_MEMORY_PROPERTIES = 12 ZE_STRUCTURE_TYPE_CONTEXT_DESC = 13 ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC = 14 ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC = 15 ZE_STRUCTURE_TYPE_EVENT_POOL_DESC = 16 ZE_STRUCTURE_TYPE_EVENT_DESC = 17 ZE_STRUCTURE_TYPE_FENCE_DESC = 18 ZE_STRUCTURE_TYPE_IMAGE_DESC = 19 ZE_STRUCTURE_TYPE_IMAGE_PROPERTIES = 20 ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC = 21 ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC = 22 ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES = 23 ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC = 24 ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD = 25 ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD = 26 ZE_STRUCTURE_TYPE_MODULE_DESC = 27 ZE_STRUCTURE_TYPE_MODULE_PROPERTIES = 28 ZE_STRUCTURE_TYPE_KERNEL_DESC = 29 ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES = 30 ZE_STRUCTURE_TYPE_SAMPLER_DESC = 31 ZE_STRUCTURE_TYPE_PHYSICAL_MEM_DESC = 32 ZE_STRUCTURE_TYPE_KERNEL_PREFERRED_GROUP_SIZE_PROPERTIES = 33 ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_WIN32 = 34 ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_WIN32 = 35 ZE_STRUCTURE_TYPE_DEVICE_RAYTRACING_EXT_PROPERTIES = 65537 ZE_STRUCTURE_TYPE_RAYTRACING_MEM_ALLOC_EXT_DESC = 65538 ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES = 65539 ZE_STRUCTURE_TYPE_CACHE_RESERVATION_EXT_DESC = 65540 ZE_STRUCTURE_TYPE_EU_COUNT_EXT = 65541 ZE_STRUCTURE_TYPE_SRGB_EXT_DESC = 65542 ZE_STRUCTURE_TYPE_LINKAGE_INSPECTION_EXT_DESC = 65543 ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES = 65544 ZE_STRUCTURE_TYPE_DRIVER_MEMORY_FREE_EXT_PROPERTIES = 65545 ZE_STRUCTURE_TYPE_MEMORY_FREE_EXT_DESC = 65546 ZE_STRUCTURE_TYPE_MEMORY_COMPRESSION_HINTS_EXT_DESC = 65547 ZE_STRUCTURE_TYPE_IMAGE_ALLOCATION_EXT_PROPERTIES = 65548 ZE_STRUCTURE_TYPE_DEVICE_LUID_EXT_PROPERTIES = 65549 ZE_STRUCTURE_TYPE_DEVICE_MEMORY_EXT_PROPERTIES = 65550 ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT = 65551 ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXT_DESC = 65552 ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_PROPERTIES = 65553 ZE_STRUCTURE_TYPE_EVENT_QUERY_KERNEL_TIMESTAMPS_RESULTS_EXT_PROPERTIES = 65554 ZE_STRUCTURE_TYPE_KERNEL_MAX_GROUP_SIZE_EXT_PROPERTIES = 65555 ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC = 131073 ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC = 131074 ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_PROPERTIES = 131075 ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_DESC = 131076 ZE_STRUCTURE_TYPE_IMAGE_VIEW_PLANAR_EXP_DESC = 131077 ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2 = 131078 ZE_STRUCTURE_TYPE_IMAGE_MEMORY_EXP_PROPERTIES = 131079 ZE_STRUCTURE_TYPE_POWER_SAVING_HINT_EXP_DESC = 131080 ZE_STRUCTURE_TYPE_COPY_BANDWIDTH_EXP_PROPERTIES = 131081 ZE_STRUCTURE_TYPE_DEVICE_P2P_BANDWIDTH_EXP_PROPERTIES = 131082 ZE_STRUCTURE_TYPE_FABRIC_VERTEX_EXP_PROPERTIES = 131083 ZE_STRUCTURE_TYPE_FABRIC_EDGE_EXP_PROPERTIES = 131084 ZE_STRUCTURE_TYPE_MEMORY_SUB_ALLOCATIONS_EXP_PROPERTIES = 131085 ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_DESC = 131086 ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXP_DESC = 131087 ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXP_PROPERTIES = 131088 ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXP_PROPERTIES = 131089 ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXP_PROPERTIES = 131090 ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXP_CB_PARAMS = 131091 ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC = 131092 ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_PROPERTIES = 131093 ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_DESC = 131094 ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_ID_EXP_DESC = 131095 ZE_STRUCTURE_TYPE_MUTABLE_COMMANDS_EXP_DESC = 131096 ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC = 131097 ZE_STRUCTURE_TYPE_MUTABLE_GROUP_COUNT_EXP_DESC = 131098 ZE_STRUCTURE_TYPE_MUTABLE_GROUP_SIZE_EXP_DESC = 131099 ZE_STRUCTURE_TYPE_MUTABLE_GLOBAL_OFFSET_EXP_DESC = 131100 ZE_STRUCTURE_TYPE_PITCHED_ALLOC_DEVICE_EXP_PROPERTIES = 131101 ZE_STRUCTURE_TYPE_BINDLESS_IMAGE_EXP_DESC = 131102 ZE_STRUCTURE_TYPE_PITCHED_IMAGE_EXP_DESC = 131103 ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC = 131104 ZE_STRUCTURE_TYPE_INIT_DRIVER_TYPE_DESC = 131105 ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_EXT_DESC = 131106 ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_WIN32_EXT_DESC = 131107 ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_FD_EXT_DESC = 131108 ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_EXT = 131109 ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_WAIT_PARAMS_EXT = 131110 ZE_STRUCTURE_TYPE_DRIVER_DDI_HANDLES_EXT_PROPERTIES = 131111 ZE_STRUCTURE_TYPE_DEVICE_CACHELINE_SIZE_EXT = 131112 ZE_STRUCTURE_TYPE_DEVICE_VECTOR_WIDTH_PROPERTIES_EXT = 131113 ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_DESC = 131120 ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXT_DESC = 131121 ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_PROPERTIES = 131122 ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXT_PROPERTIES = 131123 ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXT_PROPERTIES = 131124 ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXT_CB_PARAMS = 131125 ZE_STRUCTURE_TYPE_FORCE_UINT32 = 2147483647 end const ze_structure_type_t = _ze_structure_type_t const ze_external_memory_type_flags_t = UInt32 @cenum _ze_external_memory_type_flag_t::UInt32 begin ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_FD = 1 ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF = 2 ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32 = 4 ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32_KMT = 8 ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D11_TEXTURE = 16 ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D11_TEXTURE_KMT = 32 ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D12_HEAP = 64 ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D12_RESOURCE = 128 ZE_EXTERNAL_MEMORY_TYPE_FLAG_FORCE_UINT32 = 2147483647 end const ze_external_memory_type_flag_t = _ze_external_memory_type_flag_t @cenum _ze_bandwidth_unit_t::UInt32 begin ZE_BANDWIDTH_UNIT_UNKNOWN = 0 ZE_BANDWIDTH_UNIT_BYTES_PER_NANOSEC = 1 ZE_BANDWIDTH_UNIT_BYTES_PER_CLOCK = 2 ZE_BANDWIDTH_UNIT_FORCE_UINT32 = 2147483647 end const ze_bandwidth_unit_t = _ze_bandwidth_unit_t @cenum _ze_latency_unit_t::UInt32 begin ZE_LATENCY_UNIT_UNKNOWN = 0 ZE_LATENCY_UNIT_NANOSEC = 1 ZE_LATENCY_UNIT_CLOCK = 2 ZE_LATENCY_UNIT_HOP = 3 ZE_LATENCY_UNIT_FORCE_UINT32 = 2147483647 end const ze_latency_unit_t = _ze_latency_unit_t struct _ze_uuid_t id::NTuple{16,UInt8} end const ze_uuid_t = _ze_uuid_t struct _ze_base_cb_params_t stype::ze_structure_type_t pNext::Ptr{Cvoid} end const ze_base_cb_params_t = _ze_base_cb_params_t struct _ze_base_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} end const ze_base_properties_t = _ze_base_properties_t struct _ze_base_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} end const ze_base_desc_t = _ze_base_desc_t const ze_init_driver_type_flags_t = UInt32 struct _ze_init_driver_type_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_init_driver_type_flags_t end const ze_init_driver_type_desc_t = _ze_init_driver_type_desc_t struct _ze_driver_uuid_t id::NTuple{16,UInt8} end const ze_driver_uuid_t = _ze_driver_uuid_t struct _ze_driver_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} uuid::ze_driver_uuid_t driverVersion::UInt32 end const ze_driver_properties_t = _ze_driver_properties_t const ze_ipc_property_flags_t = UInt32 struct _ze_driver_ipc_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_ipc_property_flags_t end const ze_driver_ipc_properties_t = _ze_driver_ipc_properties_t struct _ze_driver_extension_properties_t name::NTuple{256,Cchar} version::UInt32 end const ze_driver_extension_properties_t = _ze_driver_extension_properties_t struct _ze_device_uuid_t id::NTuple{16,UInt8} end const ze_device_uuid_t = _ze_device_uuid_t @cenum _ze_device_type_t::UInt32 begin ZE_DEVICE_TYPE_GPU = 1 ZE_DEVICE_TYPE_CPU = 2 ZE_DEVICE_TYPE_FPGA = 3 ZE_DEVICE_TYPE_MCA = 4 ZE_DEVICE_TYPE_VPU = 5 ZE_DEVICE_TYPE_FORCE_UINT32 = 2147483647 end const ze_device_type_t = _ze_device_type_t const ze_device_property_flags_t = UInt32 struct _ze_device_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} type::ze_device_type_t vendorId::UInt32 deviceId::UInt32 flags::ze_device_property_flags_t subdeviceId::UInt32 coreClockRate::UInt32 maxMemAllocSize::UInt64 maxHardwareContexts::UInt32 maxCommandQueuePriority::UInt32 numThreadsPerEU::UInt32 physicalEUSimdWidth::UInt32 numEUsPerSubslice::UInt32 numSubslicesPerSlice::UInt32 numSlices::UInt32 timerResolution::UInt64 timestampValidBits::UInt32 kernelTimestampValidBits::UInt32 uuid::ze_device_uuid_t name::NTuple{256,Cchar} end const ze_device_properties_t = _ze_device_properties_t struct _ze_device_thread_t slice::UInt32 subslice::UInt32 eu::UInt32 thread::UInt32 end const ze_device_thread_t = _ze_device_thread_t struct _ze_device_compute_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} maxTotalGroupSize::UInt32 maxGroupSizeX::UInt32 maxGroupSizeY::UInt32 maxGroupSizeZ::UInt32 maxGroupCountX::UInt32 maxGroupCountY::UInt32 maxGroupCountZ::UInt32 maxSharedLocalMemory::UInt32 numSubGroupSizes::UInt32 subGroupSizes::NTuple{8,UInt32} end const ze_device_compute_properties_t = _ze_device_compute_properties_t struct _ze_native_kernel_uuid_t id::NTuple{16,UInt8} end const ze_native_kernel_uuid_t = _ze_native_kernel_uuid_t const ze_device_module_flags_t = UInt32 const ze_device_fp_flags_t = UInt32 struct _ze_device_module_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} spirvVersionSupported::UInt32 flags::ze_device_module_flags_t fp16flags::ze_device_fp_flags_t fp32flags::ze_device_fp_flags_t fp64flags::ze_device_fp_flags_t maxArgumentsSize::UInt32 printfBufferSize::UInt32 nativeKernelSupported::ze_native_kernel_uuid_t end const ze_device_module_properties_t = _ze_device_module_properties_t const ze_command_queue_group_property_flags_t = UInt32 struct _ze_command_queue_group_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_command_queue_group_property_flags_t maxMemoryFillPatternSize::Csize_t numQueues::UInt32 end const ze_command_queue_group_properties_t = _ze_command_queue_group_properties_t const ze_device_memory_property_flags_t = UInt32 struct _ze_device_memory_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_device_memory_property_flags_t maxClockRate::UInt32 maxBusWidth::UInt32 totalSize::UInt64 name::NTuple{256,Cchar} end const ze_device_memory_properties_t = _ze_device_memory_properties_t const ze_memory_access_cap_flags_t = UInt32 struct _ze_device_memory_access_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} hostAllocCapabilities::ze_memory_access_cap_flags_t deviceAllocCapabilities::ze_memory_access_cap_flags_t sharedSingleDeviceAllocCapabilities::ze_memory_access_cap_flags_t sharedCrossDeviceAllocCapabilities::ze_memory_access_cap_flags_t sharedSystemAllocCapabilities::ze_memory_access_cap_flags_t end const ze_device_memory_access_properties_t = _ze_device_memory_access_properties_t const ze_device_cache_property_flags_t = UInt32 struct _ze_device_cache_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_device_cache_property_flags_t cacheSize::Csize_t end const ze_device_cache_properties_t = _ze_device_cache_properties_t struct _ze_device_image_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} maxImageDims1D::UInt32 maxImageDims2D::UInt32 maxImageDims3D::UInt32 maxImageBufferSize::UInt64 maxImageArraySlices::UInt32 maxSamplers::UInt32 maxReadImageArgs::UInt32 maxWriteImageArgs::UInt32 end const ze_device_image_properties_t = _ze_device_image_properties_t struct _ze_device_external_memory_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} memoryAllocationImportTypes::ze_external_memory_type_flags_t memoryAllocationExportTypes::ze_external_memory_type_flags_t imageImportTypes::ze_external_memory_type_flags_t imageExportTypes::ze_external_memory_type_flags_t end const ze_device_external_memory_properties_t = _ze_device_external_memory_properties_t const ze_device_p2p_property_flags_t = UInt32 struct _ze_device_p2p_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_device_p2p_property_flags_t end const ze_device_p2p_properties_t = _ze_device_p2p_properties_t const ze_context_flags_t = UInt32 struct _ze_context_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_context_flags_t end const ze_context_desc_t = _ze_context_desc_t const ze_command_queue_flags_t = UInt32 @cenum _ze_command_queue_mode_t::UInt32 begin ZE_COMMAND_QUEUE_MODE_DEFAULT = 0 ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS = 1 ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS = 2 ZE_COMMAND_QUEUE_MODE_FORCE_UINT32 = 2147483647 end const ze_command_queue_mode_t = _ze_command_queue_mode_t @cenum _ze_command_queue_priority_t::UInt32 begin ZE_COMMAND_QUEUE_PRIORITY_NORMAL = 0 ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW = 1 ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH = 2 ZE_COMMAND_QUEUE_PRIORITY_FORCE_UINT32 = 2147483647 end const ze_command_queue_priority_t = _ze_command_queue_priority_t struct _ze_command_queue_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} ordinal::UInt32 index::UInt32 flags::ze_command_queue_flags_t mode::ze_command_queue_mode_t priority::ze_command_queue_priority_t end const ze_command_queue_desc_t = _ze_command_queue_desc_t const ze_command_list_flags_t = UInt32 struct _ze_command_list_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} commandQueueGroupOrdinal::UInt32 flags::ze_command_list_flags_t end const ze_command_list_desc_t = _ze_command_list_desc_t struct _ze_copy_region_t originX::UInt32 originY::UInt32 originZ::UInt32 width::UInt32 height::UInt32 depth::UInt32 end const ze_copy_region_t = _ze_copy_region_t struct _ze_image_region_t originX::UInt32 originY::UInt32 originZ::UInt32 width::UInt32 height::UInt32 depth::UInt32 end const ze_image_region_t = _ze_image_region_t const ze_event_pool_flags_t = UInt32 struct _ze_event_pool_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_event_pool_flags_t count::UInt32 end const ze_event_pool_desc_t = _ze_event_pool_desc_t const ze_event_scope_flags_t = UInt32 struct _ze_event_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} index::UInt32 signal::ze_event_scope_flags_t wait::ze_event_scope_flags_t end const ze_event_desc_t = _ze_event_desc_t struct _ze_kernel_timestamp_data_t kernelStart::UInt64 kernelEnd::UInt64 end const ze_kernel_timestamp_data_t = _ze_kernel_timestamp_data_t struct _ze_kernel_timestamp_result_t _global::ze_kernel_timestamp_data_t context::ze_kernel_timestamp_data_t end const ze_kernel_timestamp_result_t = _ze_kernel_timestamp_result_t const ze_fence_flags_t = UInt32 struct _ze_fence_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_fence_flags_t end const ze_fence_desc_t = _ze_fence_desc_t @cenum _ze_image_format_layout_t::UInt32 begin ZE_IMAGE_FORMAT_LAYOUT_8 = 0 ZE_IMAGE_FORMAT_LAYOUT_16 = 1 ZE_IMAGE_FORMAT_LAYOUT_32 = 2 ZE_IMAGE_FORMAT_LAYOUT_8_8 = 3 ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8 = 4 ZE_IMAGE_FORMAT_LAYOUT_16_16 = 5 ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16 = 6 ZE_IMAGE_FORMAT_LAYOUT_32_32 = 7 ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32 = 8 ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2 = 9 ZE_IMAGE_FORMAT_LAYOUT_11_11_10 = 10 ZE_IMAGE_FORMAT_LAYOUT_5_6_5 = 11 ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1 = 12 ZE_IMAGE_FORMAT_LAYOUT_4_4_4_4 = 13 ZE_IMAGE_FORMAT_LAYOUT_Y8 = 14 ZE_IMAGE_FORMAT_LAYOUT_NV12 = 15 ZE_IMAGE_FORMAT_LAYOUT_YUYV = 16 ZE_IMAGE_FORMAT_LAYOUT_VYUY = 17 ZE_IMAGE_FORMAT_LAYOUT_YVYU = 18 ZE_IMAGE_FORMAT_LAYOUT_UYVY = 19 ZE_IMAGE_FORMAT_LAYOUT_AYUV = 20 ZE_IMAGE_FORMAT_LAYOUT_P010 = 21 ZE_IMAGE_FORMAT_LAYOUT_Y410 = 22 ZE_IMAGE_FORMAT_LAYOUT_P012 = 23 ZE_IMAGE_FORMAT_LAYOUT_Y16 = 24 ZE_IMAGE_FORMAT_LAYOUT_P016 = 25 ZE_IMAGE_FORMAT_LAYOUT_Y216 = 26 ZE_IMAGE_FORMAT_LAYOUT_P216 = 27 ZE_IMAGE_FORMAT_LAYOUT_P8 = 28 ZE_IMAGE_FORMAT_LAYOUT_YUY2 = 29 ZE_IMAGE_FORMAT_LAYOUT_A8P8 = 30 ZE_IMAGE_FORMAT_LAYOUT_IA44 = 31 ZE_IMAGE_FORMAT_LAYOUT_AI44 = 32 ZE_IMAGE_FORMAT_LAYOUT_Y416 = 33 ZE_IMAGE_FORMAT_LAYOUT_Y210 = 34 ZE_IMAGE_FORMAT_LAYOUT_I420 = 35 ZE_IMAGE_FORMAT_LAYOUT_YV12 = 36 ZE_IMAGE_FORMAT_LAYOUT_400P = 37 ZE_IMAGE_FORMAT_LAYOUT_422H = 38 ZE_IMAGE_FORMAT_LAYOUT_422V = 39 ZE_IMAGE_FORMAT_LAYOUT_444P = 40 ZE_IMAGE_FORMAT_LAYOUT_RGBP = 41 ZE_IMAGE_FORMAT_LAYOUT_BRGP = 42 ZE_IMAGE_FORMAT_LAYOUT_8_8_8 = 43 ZE_IMAGE_FORMAT_LAYOUT_16_16_16 = 44 ZE_IMAGE_FORMAT_LAYOUT_32_32_32 = 45 ZE_IMAGE_FORMAT_LAYOUT_FORCE_UINT32 = 2147483647 end const ze_image_format_layout_t = _ze_image_format_layout_t @cenum _ze_image_format_type_t::UInt32 begin ZE_IMAGE_FORMAT_TYPE_UINT = 0 ZE_IMAGE_FORMAT_TYPE_SINT = 1 ZE_IMAGE_FORMAT_TYPE_UNORM = 2 ZE_IMAGE_FORMAT_TYPE_SNORM = 3 ZE_IMAGE_FORMAT_TYPE_FLOAT = 4 ZE_IMAGE_FORMAT_TYPE_FORCE_UINT32 = 2147483647 end const ze_image_format_type_t = _ze_image_format_type_t @cenum _ze_image_format_swizzle_t::UInt32 begin ZE_IMAGE_FORMAT_SWIZZLE_R = 0 ZE_IMAGE_FORMAT_SWIZZLE_G = 1 ZE_IMAGE_FORMAT_SWIZZLE_B = 2 ZE_IMAGE_FORMAT_SWIZZLE_A = 3 ZE_IMAGE_FORMAT_SWIZZLE_0 = 4 ZE_IMAGE_FORMAT_SWIZZLE_1 = 5 ZE_IMAGE_FORMAT_SWIZZLE_X = 6 ZE_IMAGE_FORMAT_SWIZZLE_FORCE_UINT32 = 2147483647 end const ze_image_format_swizzle_t = _ze_image_format_swizzle_t struct _ze_image_format_t layout::ze_image_format_layout_t type::ze_image_format_type_t x::ze_image_format_swizzle_t y::ze_image_format_swizzle_t z::ze_image_format_swizzle_t w::ze_image_format_swizzle_t end const ze_image_format_t = _ze_image_format_t const ze_image_flags_t = UInt32 @cenum _ze_image_type_t::UInt32 begin ZE_IMAGE_TYPE_1D = 0 ZE_IMAGE_TYPE_1DARRAY = 1 ZE_IMAGE_TYPE_2D = 2 ZE_IMAGE_TYPE_2DARRAY = 3 ZE_IMAGE_TYPE_3D = 4 ZE_IMAGE_TYPE_BUFFER = 5 ZE_IMAGE_TYPE_FORCE_UINT32 = 2147483647 end const ze_image_type_t = _ze_image_type_t struct _ze_image_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_image_flags_t type::ze_image_type_t format::ze_image_format_t width::UInt64 height::UInt32 depth::UInt32 arraylevels::UInt32 miplevels::UInt32 end const ze_image_desc_t = _ze_image_desc_t const ze_image_sampler_filter_flags_t = UInt32 struct _ze_image_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} samplerFilterFlags::ze_image_sampler_filter_flags_t end const ze_image_properties_t = _ze_image_properties_t const ze_device_mem_alloc_flags_t = UInt32 struct _ze_device_mem_alloc_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_device_mem_alloc_flags_t ordinal::UInt32 end const ze_device_mem_alloc_desc_t = _ze_device_mem_alloc_desc_t const ze_host_mem_alloc_flags_t = UInt32 struct _ze_host_mem_alloc_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_host_mem_alloc_flags_t end const ze_host_mem_alloc_desc_t = _ze_host_mem_alloc_desc_t @cenum _ze_memory_type_t::UInt32 begin ZE_MEMORY_TYPE_UNKNOWN = 0 ZE_MEMORY_TYPE_HOST = 1 ZE_MEMORY_TYPE_DEVICE = 2 ZE_MEMORY_TYPE_SHARED = 3 ZE_MEMORY_TYPE_FORCE_UINT32 = 2147483647 end const ze_memory_type_t = _ze_memory_type_t struct _ze_memory_allocation_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} type::ze_memory_type_t id::UInt64 pageSize::UInt64 end const ze_memory_allocation_properties_t = _ze_memory_allocation_properties_t struct _ze_external_memory_export_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_external_memory_type_flags_t end const ze_external_memory_export_desc_t = _ze_external_memory_export_desc_t struct _ze_external_memory_import_fd_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_external_memory_type_flags_t fd::Cint end const ze_external_memory_import_fd_t = _ze_external_memory_import_fd_t struct _ze_external_memory_export_fd_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_external_memory_type_flags_t fd::Cint end const ze_external_memory_export_fd_t = _ze_external_memory_export_fd_t struct _ze_external_memory_import_win32_handle_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_external_memory_type_flags_t handle::Ptr{Cvoid} name::Ptr{Cvoid} end const ze_external_memory_import_win32_handle_t = _ze_external_memory_import_win32_handle_t struct _ze_external_memory_export_win32_handle_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_external_memory_type_flags_t handle::Ptr{Cvoid} end const ze_external_memory_export_win32_handle_t = _ze_external_memory_export_win32_handle_t struct _ze_module_constants_t numConstants::UInt32 pConstantIds::Ptr{UInt32} pConstantValues::Ptr{Ptr{Cvoid}} end const ze_module_constants_t = _ze_module_constants_t @cenum _ze_module_format_t::UInt32 begin ZE_MODULE_FORMAT_IL_SPIRV = 0 ZE_MODULE_FORMAT_NATIVE = 1 ZE_MODULE_FORMAT_FORCE_UINT32 = 2147483647 end const ze_module_format_t = _ze_module_format_t struct _ze_module_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} format::ze_module_format_t inputSize::Csize_t pInputModule::Ptr{UInt8} pBuildFlags::Ptr{Cchar} pConstants::Ptr{ze_module_constants_t} end const ze_module_desc_t = _ze_module_desc_t const ze_module_property_flags_t = UInt32 struct _ze_module_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_module_property_flags_t end const ze_module_properties_t = _ze_module_properties_t const ze_kernel_flags_t = UInt32 struct _ze_kernel_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_kernel_flags_t pKernelName::Ptr{Cchar} end const ze_kernel_desc_t = _ze_kernel_desc_t struct _ze_kernel_uuid_t kid::NTuple{16,UInt8} mid::NTuple{16,UInt8} end const ze_kernel_uuid_t = _ze_kernel_uuid_t struct _ze_kernel_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} numKernelArgs::UInt32 requiredGroupSizeX::UInt32 requiredGroupSizeY::UInt32 requiredGroupSizeZ::UInt32 requiredNumSubGroups::UInt32 requiredSubgroupSize::UInt32 maxSubgroupSize::UInt32 maxNumSubgroups::UInt32 localMemSize::UInt32 privateMemSize::UInt32 spillMemSize::UInt32 uuid::ze_kernel_uuid_t end const ze_kernel_properties_t = _ze_kernel_properties_t struct _ze_kernel_preferred_group_size_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} preferredMultiple::UInt32 end const ze_kernel_preferred_group_size_properties_t = _ze_kernel_preferred_group_size_properties_t struct _ze_group_count_t groupCountX::UInt32 groupCountY::UInt32 groupCountZ::UInt32 end const ze_group_count_t = _ze_group_count_t struct _ze_module_program_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} count::UInt32 inputSizes::Ptr{Csize_t} pInputModules::Ptr{Ptr{UInt8}} pBuildFlags::Ptr{Ptr{Cchar}} pConstants::Ptr{Ptr{ze_module_constants_t}} end const ze_module_program_exp_desc_t = _ze_module_program_exp_desc_t const ze_device_raytracing_ext_flags_t = UInt32 struct _ze_device_raytracing_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_device_raytracing_ext_flags_t maxBVHLevels::UInt32 end const ze_device_raytracing_ext_properties_t = _ze_device_raytracing_ext_properties_t const ze_raytracing_mem_alloc_ext_flags_t = UInt32 struct _ze_raytracing_mem_alloc_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_raytracing_mem_alloc_ext_flags_t end const ze_raytracing_mem_alloc_ext_desc_t = _ze_raytracing_mem_alloc_ext_desc_t @cenum _ze_sampler_address_mode_t::UInt32 begin ZE_SAMPLER_ADDRESS_MODE_NONE = 0 ZE_SAMPLER_ADDRESS_MODE_REPEAT = 1 ZE_SAMPLER_ADDRESS_MODE_CLAMP = 2 ZE_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER = 3 ZE_SAMPLER_ADDRESS_MODE_MIRROR = 4 ZE_SAMPLER_ADDRESS_MODE_FORCE_UINT32 = 2147483647 end const ze_sampler_address_mode_t = _ze_sampler_address_mode_t @cenum _ze_sampler_filter_mode_t::UInt32 begin ZE_SAMPLER_FILTER_MODE_NEAREST = 0 ZE_SAMPLER_FILTER_MODE_LINEAR = 1 ZE_SAMPLER_FILTER_MODE_FORCE_UINT32 = 2147483647 end const ze_sampler_filter_mode_t = _ze_sampler_filter_mode_t struct _ze_sampler_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} addressMode::ze_sampler_address_mode_t filterMode::ze_sampler_filter_mode_t isNormalized::ze_bool_t end const ze_sampler_desc_t = _ze_sampler_desc_t const ze_physical_mem_flags_t = UInt32 struct _ze_physical_mem_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_physical_mem_flags_t size::Csize_t end const ze_physical_mem_desc_t = _ze_physical_mem_desc_t const ze_device_fp_atomic_ext_flags_t = UInt32 struct _ze_float_atomic_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} fp16Flags::ze_device_fp_atomic_ext_flags_t fp32Flags::ze_device_fp_atomic_ext_flags_t fp64Flags::ze_device_fp_atomic_ext_flags_t end const ze_float_atomic_ext_properties_t = _ze_float_atomic_ext_properties_t const ze_relaxed_allocation_limits_exp_flags_t = UInt32 struct _ze_relaxed_allocation_limits_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_relaxed_allocation_limits_exp_flags_t end const ze_relaxed_allocation_limits_exp_desc_t = _ze_relaxed_allocation_limits_exp_desc_t const ze_driver_ddi_handle_ext_flags_t = UInt32 struct _ze_driver_ddi_handles_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_driver_ddi_handle_ext_flags_t end const ze_driver_ddi_handles_ext_properties_t = _ze_driver_ddi_handles_ext_properties_t const ze_external_semaphore_ext_flags_t = UInt32 struct _ze_external_semaphore_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_external_semaphore_ext_flags_t end const ze_external_semaphore_ext_desc_t = _ze_external_semaphore_ext_desc_t struct _ze_external_semaphore_win32_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} handle::Ptr{Cvoid} name::Ptr{Cchar} end const ze_external_semaphore_win32_ext_desc_t = _ze_external_semaphore_win32_ext_desc_t struct _ze_external_semaphore_fd_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} fd::Cint end const ze_external_semaphore_fd_ext_desc_t = _ze_external_semaphore_fd_ext_desc_t struct _ze_external_semaphore_signal_params_ext_t stype::ze_structure_type_t pNext::Ptr{Cvoid} value::UInt64 end const ze_external_semaphore_signal_params_ext_t = _ze_external_semaphore_signal_params_ext_t struct _ze_external_semaphore_wait_params_ext_t stype::ze_structure_type_t pNext::Ptr{Cvoid} value::UInt64 end const ze_external_semaphore_wait_params_ext_t = _ze_external_semaphore_wait_params_ext_t struct _ze_device_cache_line_size_ext_t stype::ze_structure_type_t pNext::Ptr{Cvoid} cacheLineSize::Csize_t end const ze_device_cache_line_size_ext_t = _ze_device_cache_line_size_ext_t @cenum _ze_rtas_builder_ext_version_t::UInt32 begin ZE_RTAS_BUILDER_EXT_VERSION_1_0 = 65536 ZE_RTAS_BUILDER_EXT_VERSION_CURRENT = 65536 ZE_RTAS_BUILDER_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_ext_version_t = _ze_rtas_builder_ext_version_t struct _ze_rtas_builder_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} builderVersion::ze_rtas_builder_ext_version_t end const ze_rtas_builder_ext_desc_t = _ze_rtas_builder_ext_desc_t const ze_rtas_builder_ext_flags_t = UInt32 struct _ze_rtas_builder_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_rtas_builder_ext_flags_t rtasBufferSizeBytesExpected::Csize_t rtasBufferSizeBytesMaxRequired::Csize_t scratchBufferSizeBytes::Csize_t end const ze_rtas_builder_ext_properties_t = _ze_rtas_builder_ext_properties_t const ze_rtas_parallel_operation_ext_flags_t = UInt32 struct _ze_rtas_parallel_operation_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_rtas_parallel_operation_ext_flags_t maxConcurrency::UInt32 end const ze_rtas_parallel_operation_ext_properties_t = _ze_rtas_parallel_operation_ext_properties_t const ze_rtas_device_ext_flags_t = UInt32 @cenum _ze_rtas_format_ext_t::UInt32 begin ZE_RTAS_FORMAT_EXT_INVALID = 0 ZE_RTAS_FORMAT_EXT_MAX = 2147483646 ZE_RTAS_FORMAT_EXT_FORCE_UINT32 = 2147483647 end const ze_rtas_format_ext_t = _ze_rtas_format_ext_t struct _ze_rtas_device_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_rtas_device_ext_flags_t rtasFormat::ze_rtas_format_ext_t rtasBufferAlignment::UInt32 end const ze_rtas_device_ext_properties_t = _ze_rtas_device_ext_properties_t struct _ze_rtas_float3_ext_t x::Cfloat y::Cfloat z::Cfloat end const ze_rtas_float3_ext_t = _ze_rtas_float3_ext_t struct _ze_rtas_transform_float3x4_column_major_ext_t vx_x::Cfloat vx_y::Cfloat vx_z::Cfloat vy_x::Cfloat vy_y::Cfloat vy_z::Cfloat vz_x::Cfloat vz_y::Cfloat vz_z::Cfloat p_x::Cfloat p_y::Cfloat p_z::Cfloat end const ze_rtas_transform_float3x4_column_major_ext_t = _ze_rtas_transform_float3x4_column_major_ext_t struct _ze_rtas_transform_float3x4_aligned_column_major_ext_t vx_x::Cfloat vx_y::Cfloat vx_z::Cfloat pad0::Cfloat vy_x::Cfloat vy_y::Cfloat vy_z::Cfloat pad1::Cfloat vz_x::Cfloat vz_y::Cfloat vz_z::Cfloat pad2::Cfloat p_x::Cfloat p_y::Cfloat p_z::Cfloat pad3::Cfloat end const ze_rtas_transform_float3x4_aligned_column_major_ext_t = _ze_rtas_transform_float3x4_aligned_column_major_ext_t struct _ze_rtas_transform_float3x4_row_major_ext_t vx_x::Cfloat vy_x::Cfloat vz_x::Cfloat p_x::Cfloat vx_y::Cfloat vy_y::Cfloat vz_y::Cfloat p_y::Cfloat vx_z::Cfloat vy_z::Cfloat vz_z::Cfloat p_z::Cfloat end const ze_rtas_transform_float3x4_row_major_ext_t = _ze_rtas_transform_float3x4_row_major_ext_t struct _ze_rtas_aabb_ext_t lower::ze_rtas_float3_ext_t upper::ze_rtas_float3_ext_t end const ze_rtas_aabb_ext_t = _ze_rtas_aabb_ext_t struct _ze_rtas_triangle_indices_uint32_ext_t v0::UInt32 v1::UInt32 v2::UInt32 end const ze_rtas_triangle_indices_uint32_ext_t = _ze_rtas_triangle_indices_uint32_ext_t struct _ze_rtas_quad_indices_uint32_ext_t v0::UInt32 v1::UInt32 v2::UInt32 v3::UInt32 end const ze_rtas_quad_indices_uint32_ext_t = _ze_rtas_quad_indices_uint32_ext_t const ze_rtas_builder_packed_geometry_type_ext_t = UInt8 struct _ze_rtas_builder_geometry_info_ext_t geometryType::ze_rtas_builder_packed_geometry_type_ext_t end const ze_rtas_builder_geometry_info_ext_t = _ze_rtas_builder_geometry_info_ext_t const ze_rtas_builder_packed_geometry_ext_flags_t = UInt8 const ze_rtas_builder_packed_input_data_format_ext_t = UInt8 struct _ze_rtas_builder_triangles_geometry_info_ext_t geometryType::ze_rtas_builder_packed_geometry_type_ext_t geometryFlags::ze_rtas_builder_packed_geometry_ext_flags_t geometryMask::UInt8 triangleFormat::ze_rtas_builder_packed_input_data_format_ext_t vertexFormat::ze_rtas_builder_packed_input_data_format_ext_t triangleCount::UInt32 vertexCount::UInt32 triangleStride::UInt32 vertexStride::UInt32 pTriangleBuffer::Ptr{Cvoid} pVertexBuffer::Ptr{Cvoid} end const ze_rtas_builder_triangles_geometry_info_ext_t = _ze_rtas_builder_triangles_geometry_info_ext_t struct _ze_rtas_builder_quads_geometry_info_ext_t geometryType::ze_rtas_builder_packed_geometry_type_ext_t geometryFlags::ze_rtas_builder_packed_geometry_ext_flags_t geometryMask::UInt8 quadFormat::ze_rtas_builder_packed_input_data_format_ext_t vertexFormat::ze_rtas_builder_packed_input_data_format_ext_t quadCount::UInt32 vertexCount::UInt32 quadStride::UInt32 vertexStride::UInt32 pQuadBuffer::Ptr{Cvoid} pVertexBuffer::Ptr{Cvoid} end const ze_rtas_builder_quads_geometry_info_ext_t = _ze_rtas_builder_quads_geometry_info_ext_t struct _ze_rtas_geometry_aabbs_ext_cb_params_t stype::ze_structure_type_t pNext::Ptr{Cvoid} primID::UInt32 primIDCount::UInt32 pGeomUserPtr::Ptr{Cvoid} pBuildUserPtr::Ptr{Cvoid} pBoundsOut::Ptr{ze_rtas_aabb_ext_t} end const ze_rtas_geometry_aabbs_ext_cb_params_t = _ze_rtas_geometry_aabbs_ext_cb_params_t # typedef void ( * ze_rtas_geometry_aabbs_cb_ext_t ) ( ze_rtas_geometry_aabbs_ext_cb_params_t * params ///< [in] callback function parameters structure ) const ze_rtas_geometry_aabbs_cb_ext_t = Ptr{Cvoid} struct _ze_rtas_builder_procedural_geometry_info_ext_t geometryType::ze_rtas_builder_packed_geometry_type_ext_t geometryFlags::ze_rtas_builder_packed_geometry_ext_flags_t geometryMask::UInt8 reserved::UInt8 primCount::UInt32 pfnGetBoundsCb::ze_rtas_geometry_aabbs_cb_ext_t pGeomUserPtr::Ptr{Cvoid} end const ze_rtas_builder_procedural_geometry_info_ext_t = _ze_rtas_builder_procedural_geometry_info_ext_t const ze_rtas_builder_packed_instance_ext_flags_t = UInt8 struct _ze_rtas_builder_instance_geometry_info_ext_t geometryType::ze_rtas_builder_packed_geometry_type_ext_t instanceFlags::ze_rtas_builder_packed_instance_ext_flags_t geometryMask::UInt8 transformFormat::ze_rtas_builder_packed_input_data_format_ext_t instanceUserID::UInt32 pTransform::Ptr{Cvoid} pBounds::Ptr{ze_rtas_aabb_ext_t} pAccelerationStructure::Ptr{Cvoid} end const ze_rtas_builder_instance_geometry_info_ext_t = _ze_rtas_builder_instance_geometry_info_ext_t @cenum _ze_rtas_builder_build_quality_hint_ext_t::UInt32 begin ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_LOW = 0 ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_MEDIUM = 1 ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_HIGH = 2 ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_build_quality_hint_ext_t = _ze_rtas_builder_build_quality_hint_ext_t const ze_rtas_builder_build_op_ext_flags_t = UInt32 struct _ze_rtas_builder_build_op_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} rtasFormat::ze_rtas_format_ext_t buildQuality::ze_rtas_builder_build_quality_hint_ext_t buildFlags::ze_rtas_builder_build_op_ext_flags_t ppGeometries::Ptr{Ptr{ze_rtas_builder_geometry_info_ext_t}} numGeometries::UInt32 end const ze_rtas_builder_build_op_ext_desc_t = _ze_rtas_builder_build_op_ext_desc_t struct _ze_device_vector_width_properties_ext_t stype::ze_structure_type_t pNext::Ptr{Cvoid} vector_width_size::UInt32 preferred_vector_width_char::UInt32 preferred_vector_width_short::UInt32 preferred_vector_width_int::UInt32 preferred_vector_width_long::UInt32 preferred_vector_width_float::UInt32 preferred_vector_width_double::UInt32 preferred_vector_width_half::UInt32 native_vector_width_char::UInt32 native_vector_width_short::UInt32 native_vector_width_int::UInt32 native_vector_width_long::UInt32 native_vector_width_float::UInt32 native_vector_width_double::UInt32 native_vector_width_half::UInt32 end const ze_device_vector_width_properties_ext_t = _ze_device_vector_width_properties_ext_t struct _ze_cache_reservation_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} maxCacheReservationSize::Csize_t end const ze_cache_reservation_ext_desc_t = _ze_cache_reservation_ext_desc_t struct _ze_image_memory_properties_exp_t stype::ze_structure_type_t pNext::Ptr{Cvoid} size::UInt64 rowPitch::UInt64 slicePitch::UInt64 end const ze_image_memory_properties_exp_t = _ze_image_memory_properties_exp_t struct _ze_image_view_planar_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} planeIndex::UInt32 end const ze_image_view_planar_ext_desc_t = _ze_image_view_planar_ext_desc_t struct _ze_image_view_planar_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} planeIndex::UInt32 end const ze_image_view_planar_exp_desc_t = _ze_image_view_planar_exp_desc_t const ze_scheduling_hint_exp_flags_t = UInt32 struct _ze_scheduling_hint_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} schedulingHintFlags::ze_scheduling_hint_exp_flags_t end const ze_scheduling_hint_exp_properties_t = _ze_scheduling_hint_exp_properties_t struct _ze_scheduling_hint_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_scheduling_hint_exp_flags_t end const ze_scheduling_hint_exp_desc_t = _ze_scheduling_hint_exp_desc_t struct _ze_context_power_saving_hint_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} hint::UInt32 end const ze_context_power_saving_hint_exp_desc_t = _ze_context_power_saving_hint_exp_desc_t struct _ze_eu_count_ext_t stype::ze_structure_type_t pNext::Ptr{Cvoid} numTotalEUs::UInt32 end const ze_eu_count_ext_t = _ze_eu_count_ext_t struct _ze_pci_address_ext_t domain::UInt32 bus::UInt32 device::UInt32 _function::UInt32 end const ze_pci_address_ext_t = _ze_pci_address_ext_t struct _ze_pci_speed_ext_t genVersion::Int32 width::Int32 maxBandwidth::Int64 end const ze_pci_speed_ext_t = _ze_pci_speed_ext_t struct _ze_pci_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} address::ze_pci_address_ext_t maxSpeed::ze_pci_speed_ext_t end const ze_pci_ext_properties_t = _ze_pci_ext_properties_t struct _ze_srgb_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} sRGB::ze_bool_t end const ze_srgb_ext_desc_t = _ze_srgb_ext_desc_t struct _ze_image_allocation_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} id::UInt64 end const ze_image_allocation_ext_properties_t = _ze_image_allocation_ext_properties_t const ze_linkage_inspection_ext_flags_t = UInt32 struct _ze_linkage_inspection_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_linkage_inspection_ext_flags_t end const ze_linkage_inspection_ext_desc_t = _ze_linkage_inspection_ext_desc_t const ze_memory_compression_hints_ext_flags_t = UInt32 struct _ze_memory_compression_hints_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_memory_compression_hints_ext_flags_t end const ze_memory_compression_hints_ext_desc_t = _ze_memory_compression_hints_ext_desc_t const ze_driver_memory_free_policy_ext_flags_t = UInt32 struct _ze_driver_memory_free_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} freePolicies::ze_driver_memory_free_policy_ext_flags_t end const ze_driver_memory_free_ext_properties_t = _ze_driver_memory_free_ext_properties_t struct _ze_memory_free_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} freePolicy::ze_driver_memory_free_policy_ext_flags_t end const ze_memory_free_ext_desc_t = _ze_memory_free_ext_desc_t struct _ze_device_p2p_bandwidth_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} logicalBandwidth::UInt32 physicalBandwidth::UInt32 bandwidthUnit::ze_bandwidth_unit_t logicalLatency::UInt32 physicalLatency::UInt32 latencyUnit::ze_latency_unit_t end const ze_device_p2p_bandwidth_exp_properties_t = _ze_device_p2p_bandwidth_exp_properties_t struct _ze_copy_bandwidth_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} copyBandwidth::UInt32 copyBandwidthUnit::ze_bandwidth_unit_t end const ze_copy_bandwidth_exp_properties_t = _ze_copy_bandwidth_exp_properties_t struct _ze_device_luid_ext_t id::NTuple{8,UInt8} end const ze_device_luid_ext_t = _ze_device_luid_ext_t struct _ze_device_luid_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} luid::ze_device_luid_ext_t nodeMask::UInt32 end const ze_device_luid_ext_properties_t = _ze_device_luid_ext_properties_t struct _ze_fabric_vertex_pci_exp_address_t domain::UInt32 bus::UInt32 device::UInt32 _function::UInt32 end const ze_fabric_vertex_pci_exp_address_t = _ze_fabric_vertex_pci_exp_address_t @cenum _ze_fabric_vertex_exp_type_t::UInt32 begin ZE_FABRIC_VERTEX_EXP_TYPE_UNKNOWN = 0 ZE_FABRIC_VERTEX_EXP_TYPE_DEVICE = 1 ZE_FABRIC_VERTEX_EXP_TYPE_SUBDEVICE = 2 ZE_FABRIC_VERTEX_EXP_TYPE_SWITCH = 3 ZE_FABRIC_VERTEX_EXP_TYPE_FORCE_UINT32 = 2147483647 end const ze_fabric_vertex_exp_type_t = _ze_fabric_vertex_exp_type_t struct _ze_fabric_vertex_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} uuid::ze_uuid_t type::ze_fabric_vertex_exp_type_t remote::ze_bool_t address::ze_fabric_vertex_pci_exp_address_t end const ze_fabric_vertex_exp_properties_t = _ze_fabric_vertex_exp_properties_t @cenum _ze_fabric_edge_exp_duplexity_t::UInt32 begin ZE_FABRIC_EDGE_EXP_DUPLEXITY_UNKNOWN = 0 ZE_FABRIC_EDGE_EXP_DUPLEXITY_HALF_DUPLEX = 1 ZE_FABRIC_EDGE_EXP_DUPLEXITY_FULL_DUPLEX = 2 ZE_FABRIC_EDGE_EXP_DUPLEXITY_FORCE_UINT32 = 2147483647 end const ze_fabric_edge_exp_duplexity_t = _ze_fabric_edge_exp_duplexity_t struct _ze_fabric_edge_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} uuid::ze_uuid_t model::NTuple{256,Cchar} bandwidth::UInt32 bandwidthUnit::ze_bandwidth_unit_t latency::UInt32 latencyUnit::ze_latency_unit_t duplexity::ze_fabric_edge_exp_duplexity_t end const ze_fabric_edge_exp_properties_t = _ze_fabric_edge_exp_properties_t @cenum _ze_device_memory_ext_type_t::UInt32 begin ZE_DEVICE_MEMORY_EXT_TYPE_HBM = 0 ZE_DEVICE_MEMORY_EXT_TYPE_HBM2 = 1 ZE_DEVICE_MEMORY_EXT_TYPE_DDR = 2 ZE_DEVICE_MEMORY_EXT_TYPE_DDR2 = 3 ZE_DEVICE_MEMORY_EXT_TYPE_DDR3 = 4 ZE_DEVICE_MEMORY_EXT_TYPE_DDR4 = 5 ZE_DEVICE_MEMORY_EXT_TYPE_DDR5 = 6 ZE_DEVICE_MEMORY_EXT_TYPE_LPDDR = 7 ZE_DEVICE_MEMORY_EXT_TYPE_LPDDR3 = 8 ZE_DEVICE_MEMORY_EXT_TYPE_LPDDR4 = 9 ZE_DEVICE_MEMORY_EXT_TYPE_LPDDR5 = 10 ZE_DEVICE_MEMORY_EXT_TYPE_SRAM = 11 ZE_DEVICE_MEMORY_EXT_TYPE_L1 = 12 ZE_DEVICE_MEMORY_EXT_TYPE_L3 = 13 ZE_DEVICE_MEMORY_EXT_TYPE_GRF = 14 ZE_DEVICE_MEMORY_EXT_TYPE_SLM = 15 ZE_DEVICE_MEMORY_EXT_TYPE_GDDR4 = 16 ZE_DEVICE_MEMORY_EXT_TYPE_GDDR5 = 17 ZE_DEVICE_MEMORY_EXT_TYPE_GDDR5X = 18 ZE_DEVICE_MEMORY_EXT_TYPE_GDDR6 = 19 ZE_DEVICE_MEMORY_EXT_TYPE_GDDR6X = 20 ZE_DEVICE_MEMORY_EXT_TYPE_GDDR7 = 21 ZE_DEVICE_MEMORY_EXT_TYPE_FORCE_UINT32 = 2147483647 end const ze_device_memory_ext_type_t = _ze_device_memory_ext_type_t struct _ze_device_memory_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} type::ze_device_memory_ext_type_t physicalSize::UInt64 readBandwidth::UInt32 writeBandwidth::UInt32 bandwidthUnit::ze_bandwidth_unit_t end const ze_device_memory_ext_properties_t = _ze_device_memory_ext_properties_t struct _ze_device_ip_version_ext_t stype::ze_structure_type_t pNext::Ptr{Cvoid} ipVersion::UInt32 end const ze_device_ip_version_ext_t = _ze_device_ip_version_ext_t struct _ze_kernel_max_group_size_properties_ext_t stype::ze_structure_type_t pNext::Ptr{Cvoid} maxGroupSize::UInt32 end const ze_kernel_max_group_size_properties_ext_t = _ze_kernel_max_group_size_properties_ext_t struct _ze_sub_allocation_t base::Ptr{Cvoid} size::Csize_t end const ze_sub_allocation_t = _ze_sub_allocation_t struct _ze_memory_sub_allocations_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} pCount::Ptr{UInt32} pSubAllocations::Ptr{ze_sub_allocation_t} end const ze_memory_sub_allocations_exp_properties_t = _ze_memory_sub_allocations_exp_properties_t const ze_event_query_kernel_timestamps_ext_flags_t = UInt32 struct _ze_event_query_kernel_timestamps_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_event_query_kernel_timestamps_ext_flags_t end const ze_event_query_kernel_timestamps_ext_properties_t = _ze_event_query_kernel_timestamps_ext_properties_t struct _ze_synchronized_timestamp_data_ext_t kernelStart::UInt64 kernelEnd::UInt64 end const ze_synchronized_timestamp_data_ext_t = _ze_synchronized_timestamp_data_ext_t struct _ze_synchronized_timestamp_result_ext_t _global::ze_synchronized_timestamp_data_ext_t context::ze_synchronized_timestamp_data_ext_t end const ze_synchronized_timestamp_result_ext_t = _ze_synchronized_timestamp_result_ext_t struct _ze_event_query_kernel_timestamps_results_ext_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} pKernelTimestampsBuffer::Ptr{ze_kernel_timestamp_result_t} pSynchronizedTimestampsBuffer::Ptr{ze_synchronized_timestamp_result_ext_t} end const ze_event_query_kernel_timestamps_results_ext_properties_t = _ze_event_query_kernel_timestamps_results_ext_properties_t @cenum _ze_rtas_builder_exp_version_t::UInt32 begin ZE_RTAS_BUILDER_EXP_VERSION_1_0 = 65536 ZE_RTAS_BUILDER_EXP_VERSION_CURRENT = 65536 ZE_RTAS_BUILDER_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_exp_version_t = _ze_rtas_builder_exp_version_t struct _ze_rtas_builder_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} builderVersion::ze_rtas_builder_exp_version_t end const ze_rtas_builder_exp_desc_t = _ze_rtas_builder_exp_desc_t const ze_rtas_builder_exp_flags_t = UInt32 struct _ze_rtas_builder_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_rtas_builder_exp_flags_t rtasBufferSizeBytesExpected::Csize_t rtasBufferSizeBytesMaxRequired::Csize_t scratchBufferSizeBytes::Csize_t end const ze_rtas_builder_exp_properties_t = _ze_rtas_builder_exp_properties_t const ze_rtas_parallel_operation_exp_flags_t = UInt32 struct _ze_rtas_parallel_operation_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_rtas_parallel_operation_exp_flags_t maxConcurrency::UInt32 end const ze_rtas_parallel_operation_exp_properties_t = _ze_rtas_parallel_operation_exp_properties_t const ze_rtas_device_exp_flags_t = UInt32 @cenum _ze_rtas_format_exp_t::UInt32 begin ZE_RTAS_FORMAT_EXP_INVALID = 0 ZE_RTAS_FORMAT_EXP_MAX = 2147483646 ZE_RTAS_FORMAT_EXP_FORCE_UINT32 = 2147483647 end const ze_rtas_format_exp_t = _ze_rtas_format_exp_t struct _ze_rtas_device_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_rtas_device_exp_flags_t rtasFormat::ze_rtas_format_exp_t rtasBufferAlignment::UInt32 end const ze_rtas_device_exp_properties_t = _ze_rtas_device_exp_properties_t struct _ze_rtas_float3_exp_t x::Cfloat y::Cfloat z::Cfloat end const ze_rtas_float3_exp_t = _ze_rtas_float3_exp_t struct _ze_rtas_transform_float3x4_column_major_exp_t vx_x::Cfloat vx_y::Cfloat vx_z::Cfloat vy_x::Cfloat vy_y::Cfloat vy_z::Cfloat vz_x::Cfloat vz_y::Cfloat vz_z::Cfloat p_x::Cfloat p_y::Cfloat p_z::Cfloat end const ze_rtas_transform_float3x4_column_major_exp_t = _ze_rtas_transform_float3x4_column_major_exp_t struct _ze_rtas_transform_float3x4_aligned_column_major_exp_t vx_x::Cfloat vx_y::Cfloat vx_z::Cfloat pad0::Cfloat vy_x::Cfloat vy_y::Cfloat vy_z::Cfloat pad1::Cfloat vz_x::Cfloat vz_y::Cfloat vz_z::Cfloat pad2::Cfloat p_x::Cfloat p_y::Cfloat p_z::Cfloat pad3::Cfloat end const ze_rtas_transform_float3x4_aligned_column_major_exp_t = _ze_rtas_transform_float3x4_aligned_column_major_exp_t struct _ze_rtas_transform_float3x4_row_major_exp_t vx_x::Cfloat vy_x::Cfloat vz_x::Cfloat p_x::Cfloat vx_y::Cfloat vy_y::Cfloat vz_y::Cfloat p_y::Cfloat vx_z::Cfloat vy_z::Cfloat vz_z::Cfloat p_z::Cfloat end const ze_rtas_transform_float3x4_row_major_exp_t = _ze_rtas_transform_float3x4_row_major_exp_t struct _ze_rtas_aabb_exp_t lower::ze_rtas_float3_exp_t upper::ze_rtas_float3_exp_t end const ze_rtas_aabb_exp_t = _ze_rtas_aabb_exp_t struct _ze_rtas_triangle_indices_uint32_exp_t v0::UInt32 v1::UInt32 v2::UInt32 end const ze_rtas_triangle_indices_uint32_exp_t = _ze_rtas_triangle_indices_uint32_exp_t struct _ze_rtas_quad_indices_uint32_exp_t v0::UInt32 v1::UInt32 v2::UInt32 v3::UInt32 end const ze_rtas_quad_indices_uint32_exp_t = _ze_rtas_quad_indices_uint32_exp_t const ze_rtas_builder_packed_geometry_type_exp_t = UInt8 struct _ze_rtas_builder_geometry_info_exp_t geometryType::ze_rtas_builder_packed_geometry_type_exp_t end const ze_rtas_builder_geometry_info_exp_t = _ze_rtas_builder_geometry_info_exp_t const ze_rtas_builder_packed_geometry_exp_flags_t = UInt8 const ze_rtas_builder_packed_input_data_format_exp_t = UInt8 struct _ze_rtas_builder_triangles_geometry_info_exp_t geometryType::ze_rtas_builder_packed_geometry_type_exp_t geometryFlags::ze_rtas_builder_packed_geometry_exp_flags_t geometryMask::UInt8 triangleFormat::ze_rtas_builder_packed_input_data_format_exp_t vertexFormat::ze_rtas_builder_packed_input_data_format_exp_t triangleCount::UInt32 vertexCount::UInt32 triangleStride::UInt32 vertexStride::UInt32 pTriangleBuffer::Ptr{Cvoid} pVertexBuffer::Ptr{Cvoid} end const ze_rtas_builder_triangles_geometry_info_exp_t = _ze_rtas_builder_triangles_geometry_info_exp_t struct _ze_rtas_builder_quads_geometry_info_exp_t geometryType::ze_rtas_builder_packed_geometry_type_exp_t geometryFlags::ze_rtas_builder_packed_geometry_exp_flags_t geometryMask::UInt8 quadFormat::ze_rtas_builder_packed_input_data_format_exp_t vertexFormat::ze_rtas_builder_packed_input_data_format_exp_t quadCount::UInt32 vertexCount::UInt32 quadStride::UInt32 vertexStride::UInt32 pQuadBuffer::Ptr{Cvoid} pVertexBuffer::Ptr{Cvoid} end const ze_rtas_builder_quads_geometry_info_exp_t = _ze_rtas_builder_quads_geometry_info_exp_t struct _ze_rtas_geometry_aabbs_exp_cb_params_t stype::ze_structure_type_t pNext::Ptr{Cvoid} primID::UInt32 primIDCount::UInt32 pGeomUserPtr::Ptr{Cvoid} pBuildUserPtr::Ptr{Cvoid} pBoundsOut::Ptr{ze_rtas_aabb_exp_t} end const ze_rtas_geometry_aabbs_exp_cb_params_t = _ze_rtas_geometry_aabbs_exp_cb_params_t # typedef void ( * ze_rtas_geometry_aabbs_cb_exp_t ) ( ze_rtas_geometry_aabbs_exp_cb_params_t * params ///< [in] callback function parameters structure ) const ze_rtas_geometry_aabbs_cb_exp_t = Ptr{Cvoid} struct _ze_rtas_builder_procedural_geometry_info_exp_t geometryType::ze_rtas_builder_packed_geometry_type_exp_t geometryFlags::ze_rtas_builder_packed_geometry_exp_flags_t geometryMask::UInt8 reserved::UInt8 primCount::UInt32 pfnGetBoundsCb::ze_rtas_geometry_aabbs_cb_exp_t pGeomUserPtr::Ptr{Cvoid} end const ze_rtas_builder_procedural_geometry_info_exp_t = _ze_rtas_builder_procedural_geometry_info_exp_t const ze_rtas_builder_packed_instance_exp_flags_t = UInt8 struct _ze_rtas_builder_instance_geometry_info_exp_t geometryType::ze_rtas_builder_packed_geometry_type_exp_t instanceFlags::ze_rtas_builder_packed_instance_exp_flags_t geometryMask::UInt8 transformFormat::ze_rtas_builder_packed_input_data_format_exp_t instanceUserID::UInt32 pTransform::Ptr{Cvoid} pBounds::Ptr{ze_rtas_aabb_exp_t} pAccelerationStructure::Ptr{Cvoid} end const ze_rtas_builder_instance_geometry_info_exp_t = _ze_rtas_builder_instance_geometry_info_exp_t @cenum _ze_rtas_builder_build_quality_hint_exp_t::UInt32 begin ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_LOW = 0 ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_MEDIUM = 1 ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_HIGH = 2 ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXP_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_build_quality_hint_exp_t = _ze_rtas_builder_build_quality_hint_exp_t const ze_rtas_builder_build_op_exp_flags_t = UInt32 struct _ze_rtas_builder_build_op_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} rtasFormat::ze_rtas_format_exp_t buildQuality::ze_rtas_builder_build_quality_hint_exp_t buildFlags::ze_rtas_builder_build_op_exp_flags_t ppGeometries::Ptr{Ptr{ze_rtas_builder_geometry_info_exp_t}} numGeometries::UInt32 end const ze_rtas_builder_build_op_exp_desc_t = _ze_rtas_builder_build_op_exp_desc_t const ze_event_pool_counter_based_exp_flags_t = UInt32 struct _ze_event_pool_counter_based_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_event_pool_counter_based_exp_flags_t end const ze_event_pool_counter_based_exp_desc_t = _ze_event_pool_counter_based_exp_desc_t const ze_image_bindless_exp_flags_t = UInt32 struct _ze_image_bindless_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_image_bindless_exp_flags_t end const ze_image_bindless_exp_desc_t = _ze_image_bindless_exp_desc_t struct _ze_image_pitched_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} ptr::Ptr{Cvoid} end const ze_image_pitched_exp_desc_t = _ze_image_pitched_exp_desc_t struct _ze_device_pitched_alloc_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} maxImageLinearWidth::Csize_t maxImageLinearHeight::Csize_t end const ze_device_pitched_alloc_exp_properties_t = _ze_device_pitched_alloc_exp_properties_t const ze_mutable_command_exp_flags_t = UInt32 struct _ze_mutable_command_id_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_mutable_command_exp_flags_t end const ze_mutable_command_id_exp_desc_t = _ze_mutable_command_id_exp_desc_t const ze_mutable_command_list_exp_flags_t = UInt32 struct _ze_mutable_command_list_exp_properties_t stype::ze_structure_type_t pNext::Ptr{Cvoid} mutableCommandListFlags::ze_mutable_command_list_exp_flags_t mutableCommandFlags::ze_mutable_command_exp_flags_t end const ze_mutable_command_list_exp_properties_t = _ze_mutable_command_list_exp_properties_t struct _ze_mutable_command_list_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::ze_mutable_command_list_exp_flags_t end const ze_mutable_command_list_exp_desc_t = _ze_mutable_command_list_exp_desc_t struct _ze_mutable_commands_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} flags::UInt32 end const ze_mutable_commands_exp_desc_t = _ze_mutable_commands_exp_desc_t struct _ze_mutable_kernel_argument_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} commandId::UInt64 argIndex::UInt32 argSize::Csize_t pArgValue::Ptr{Cvoid} end const ze_mutable_kernel_argument_exp_desc_t = _ze_mutable_kernel_argument_exp_desc_t struct _ze_mutable_group_count_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} commandId::UInt64 pGroupCount::Ptr{ze_group_count_t} end const ze_mutable_group_count_exp_desc_t = _ze_mutable_group_count_exp_desc_t struct _ze_mutable_group_size_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} commandId::UInt64 groupSizeX::UInt32 groupSizeY::UInt32 groupSizeZ::UInt32 end const ze_mutable_group_size_exp_desc_t = _ze_mutable_group_size_exp_desc_t struct _ze_mutable_global_offset_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} commandId::UInt64 offsetX::UInt32 offsetY::UInt32 offsetZ::UInt32 end const ze_mutable_global_offset_exp_desc_t = _ze_mutable_global_offset_exp_desc_t struct _ze_mutable_graph_argument_exp_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} commandId::UInt64 argIndex::UInt32 pArgValue::Ptr{Cvoid} end const ze_mutable_graph_argument_exp_desc_t = _ze_mutable_graph_argument_exp_desc_t const ze_init_flags_t = UInt32 @cenum _ze_init_flag_t::UInt32 begin ZE_INIT_FLAG_GPU_ONLY = 1 ZE_INIT_FLAG_VPU_ONLY = 2 ZE_INIT_FLAG_FORCE_UINT32 = 2147483647 end const ze_init_flag_t = _ze_init_flag_t @checked function zeInit(flags) @ccall libze_loader.zeInit(flags::ze_init_flags_t)::ze_result_t end @checked function zeDriverGet(pCount, phDrivers) @ccall libze_loader.zeDriverGet(pCount::Ptr{UInt32}, phDrivers::Ptr{ze_driver_handle_t})::ze_result_t end @cenum _ze_init_driver_type_flag_t::UInt32 begin ZE_INIT_DRIVER_TYPE_FLAG_GPU = 1 ZE_INIT_DRIVER_TYPE_FLAG_NPU = 2 ZE_INIT_DRIVER_TYPE_FLAG_FORCE_UINT32 = 2147483647 end const ze_init_driver_type_flag_t = _ze_init_driver_type_flag_t @checked function zeInitDrivers(pCount, phDrivers, desc) @ccall libze_loader.zeInitDrivers(pCount::Ptr{UInt32}, phDrivers::Ptr{ze_driver_handle_t}, desc::Ptr{ze_init_driver_type_desc_t})::ze_result_t end @cenum _ze_api_version_t::UInt32 begin ZE_API_VERSION_1_0 = 65536 ZE_API_VERSION_1_1 = 65537 ZE_API_VERSION_1_2 = 65538 ZE_API_VERSION_1_3 = 65539 ZE_API_VERSION_1_4 = 65540 ZE_API_VERSION_1_5 = 65541 ZE_API_VERSION_1_6 = 65542 ZE_API_VERSION_1_7 = 65543 ZE_API_VERSION_1_8 = 65544 ZE_API_VERSION_1_9 = 65545 ZE_API_VERSION_1_10 = 65546 ZE_API_VERSION_1_11 = 65547 ZE_API_VERSION_1_12 = 65548 ZE_API_VERSION_1_13 = 65549 ZE_API_VERSION_CURRENT = 65549 ZE_API_VERSION_FORCE_UINT32 = 2147483647 end const ze_api_version_t = _ze_api_version_t @checked function zeDriverGetApiVersion(hDriver, version) @ccall libze_loader.zeDriverGetApiVersion(hDriver::ze_driver_handle_t, version::Ptr{ze_api_version_t})::ze_result_t end @checked function zeDriverGetProperties(hDriver, pDriverProperties) @ccall libze_loader.zeDriverGetProperties(hDriver::ze_driver_handle_t, pDriverProperties::Ptr{ze_driver_properties_t})::ze_result_t end @cenum _ze_ipc_property_flag_t::UInt32 begin ZE_IPC_PROPERTY_FLAG_MEMORY = 1 ZE_IPC_PROPERTY_FLAG_EVENT_POOL = 2 ZE_IPC_PROPERTY_FLAG_FORCE_UINT32 = 2147483647 end const ze_ipc_property_flag_t = _ze_ipc_property_flag_t @checked function zeDriverGetIpcProperties(hDriver, pIpcProperties) @ccall libze_loader.zeDriverGetIpcProperties(hDriver::ze_driver_handle_t, pIpcProperties::Ptr{ze_driver_ipc_properties_t})::ze_result_t end @checked function zeDriverGetExtensionProperties(hDriver, pCount, pExtensionProperties) @ccall libze_loader.zeDriverGetExtensionProperties(hDriver::ze_driver_handle_t, pCount::Ptr{UInt32}, pExtensionProperties::Ptr{ze_driver_extension_properties_t})::ze_result_t end @checked function zeDriverGetExtensionFunctionAddress(hDriver, name, ppFunctionAddress) @ccall libze_loader.zeDriverGetExtensionFunctionAddress(hDriver::ze_driver_handle_t, name::Ptr{Cchar}, ppFunctionAddress::Ptr{Ptr{Cvoid}})::ze_result_t end @checked function zeDriverGetLastErrorDescription(hDriver, ppString) @ccall libze_loader.zeDriverGetLastErrorDescription(hDriver::ze_driver_handle_t, ppString::Ptr{Ptr{Cchar}})::ze_result_t end @checked function zeDeviceGet(hDriver, pCount, phDevices) @ccall libze_loader.zeDeviceGet(hDriver::ze_driver_handle_t, pCount::Ptr{UInt32}, phDevices::Ptr{ze_device_handle_t})::ze_result_t end @checked function zeDeviceGetRootDevice(hDevice, phRootDevice) @ccall libze_loader.zeDeviceGetRootDevice(hDevice::ze_device_handle_t, phRootDevice::Ptr{ze_device_handle_t})::ze_result_t end @checked function zeDeviceGetSubDevices(hDevice, pCount, phSubdevices) @ccall libze_loader.zeDeviceGetSubDevices(hDevice::ze_device_handle_t, pCount::Ptr{UInt32}, phSubdevices::Ptr{ze_device_handle_t})::ze_result_t end @cenum _ze_device_property_flag_t::UInt32 begin ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = 1 ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE = 2 ZE_DEVICE_PROPERTY_FLAG_ECC = 4 ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING = 8 ZE_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 2147483647 end const ze_device_property_flag_t = _ze_device_property_flag_t @checked function zeDeviceGetProperties(hDevice, pDeviceProperties) @ccall libze_loader.zeDeviceGetProperties(hDevice::ze_device_handle_t, pDeviceProperties::Ptr{ze_device_properties_t})::ze_result_t end @checked function zeDeviceGetComputeProperties(hDevice, pComputeProperties) @ccall libze_loader.zeDeviceGetComputeProperties(hDevice::ze_device_handle_t, pComputeProperties::Ptr{ze_device_compute_properties_t})::ze_result_t end @cenum _ze_device_module_flag_t::UInt32 begin ZE_DEVICE_MODULE_FLAG_FP16 = 1 ZE_DEVICE_MODULE_FLAG_FP64 = 2 ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS = 4 ZE_DEVICE_MODULE_FLAG_DP4A = 8 ZE_DEVICE_MODULE_FLAG_FORCE_UINT32 = 2147483647 end const ze_device_module_flag_t = _ze_device_module_flag_t @cenum _ze_device_fp_flag_t::UInt32 begin ZE_DEVICE_FP_FLAG_DENORM = 1 ZE_DEVICE_FP_FLAG_INF_NAN = 2 ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST = 4 ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO = 8 ZE_DEVICE_FP_FLAG_ROUND_TO_INF = 16 ZE_DEVICE_FP_FLAG_FMA = 32 ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT = 64 ZE_DEVICE_FP_FLAG_SOFT_FLOAT = 128 ZE_DEVICE_FP_FLAG_FORCE_UINT32 = 2147483647 end const ze_device_fp_flag_t = _ze_device_fp_flag_t @checked function zeDeviceGetModuleProperties(hDevice, pModuleProperties) @ccall libze_loader.zeDeviceGetModuleProperties(hDevice::ze_device_handle_t, pModuleProperties::Ptr{ze_device_module_properties_t})::ze_result_t end @cenum _ze_command_queue_group_property_flag_t::UInt32 begin ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE = 1 ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY = 2 ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS = 4 ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS = 8 ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_FORCE_UINT32 = 2147483647 end const ze_command_queue_group_property_flag_t = _ze_command_queue_group_property_flag_t @checked function zeDeviceGetCommandQueueGroupProperties(hDevice, pCount, pCommandQueueGroupProperties) @ccall libze_loader.zeDeviceGetCommandQueueGroupProperties(hDevice::ze_device_handle_t, pCount::Ptr{UInt32}, pCommandQueueGroupProperties::Ptr{ze_command_queue_group_properties_t})::ze_result_t end @cenum _ze_device_memory_property_flag_t::UInt32 begin ZE_DEVICE_MEMORY_PROPERTY_FLAG_TBD = 1 ZE_DEVICE_MEMORY_PROPERTY_FLAG_FORCE_UINT32 = 2147483647 end const ze_device_memory_property_flag_t = _ze_device_memory_property_flag_t @checked function zeDeviceGetMemoryProperties(hDevice, pCount, pMemProperties) @ccall libze_loader.zeDeviceGetMemoryProperties(hDevice::ze_device_handle_t, pCount::Ptr{UInt32}, pMemProperties::Ptr{ze_device_memory_properties_t})::ze_result_t end @cenum _ze_memory_access_cap_flag_t::UInt32 begin ZE_MEMORY_ACCESS_CAP_FLAG_RW = 1 ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC = 2 ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT = 4 ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT_ATOMIC = 8 ZE_MEMORY_ACCESS_CAP_FLAG_FORCE_UINT32 = 2147483647 end const ze_memory_access_cap_flag_t = _ze_memory_access_cap_flag_t @checked function zeDeviceGetMemoryAccessProperties(hDevice, pMemAccessProperties) @ccall libze_loader.zeDeviceGetMemoryAccessProperties(hDevice::ze_device_handle_t, pMemAccessProperties::Ptr{ze_device_memory_access_properties_t})::ze_result_t end @cenum _ze_device_cache_property_flag_t::UInt32 begin ZE_DEVICE_CACHE_PROPERTY_FLAG_USER_CONTROL = 1 ZE_DEVICE_CACHE_PROPERTY_FLAG_FORCE_UINT32 = 2147483647 end const ze_device_cache_property_flag_t = _ze_device_cache_property_flag_t @checked function zeDeviceGetCacheProperties(hDevice, pCount, pCacheProperties) @ccall libze_loader.zeDeviceGetCacheProperties(hDevice::ze_device_handle_t, pCount::Ptr{UInt32}, pCacheProperties::Ptr{ze_device_cache_properties_t})::ze_result_t end @checked function zeDeviceGetImageProperties(hDevice, pImageProperties) @ccall libze_loader.zeDeviceGetImageProperties(hDevice::ze_device_handle_t, pImageProperties::Ptr{ze_device_image_properties_t})::ze_result_t end @checked function zeDeviceGetExternalMemoryProperties(hDevice, pExternalMemoryProperties) @ccall libze_loader.zeDeviceGetExternalMemoryProperties(hDevice::ze_device_handle_t, pExternalMemoryProperties::Ptr{ze_device_external_memory_properties_t})::ze_result_t end @cenum _ze_device_p2p_property_flag_t::UInt32 begin ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS = 1 ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS = 2 ZE_DEVICE_P2P_PROPERTY_FLAG_FORCE_UINT32 = 2147483647 end const ze_device_p2p_property_flag_t = _ze_device_p2p_property_flag_t @checked function zeDeviceGetP2PProperties(hDevice, hPeerDevice, pP2PProperties) @ccall libze_loader.zeDeviceGetP2PProperties(hDevice::ze_device_handle_t, hPeerDevice::ze_device_handle_t, pP2PProperties::Ptr{ze_device_p2p_properties_t})::ze_result_t end @checked function zeDeviceCanAccessPeer(hDevice, hPeerDevice, value) @ccall libze_loader.zeDeviceCanAccessPeer(hDevice::ze_device_handle_t, hPeerDevice::ze_device_handle_t, value::Ptr{ze_bool_t})::ze_result_t end @checked function zeDeviceGetStatus(hDevice) @ccall libze_loader.zeDeviceGetStatus(hDevice::ze_device_handle_t)::ze_result_t end @checked function zeDeviceGetGlobalTimestamps(hDevice, hostTimestamp, deviceTimestamp) @ccall libze_loader.zeDeviceGetGlobalTimestamps(hDevice::ze_device_handle_t, hostTimestamp::Ptr{UInt64}, deviceTimestamp::Ptr{UInt64})::ze_result_t end @cenum _ze_context_flag_t::UInt32 begin ZE_CONTEXT_FLAG_TBD = 1 ZE_CONTEXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_context_flag_t = _ze_context_flag_t @checked function zeContextCreate(hDriver, desc, phContext) @ccall libze_loader.zeContextCreate(hDriver::ze_driver_handle_t, desc::Ptr{ze_context_desc_t}, phContext::Ptr{ze_context_handle_t})::ze_result_t end @checked function zeContextCreateEx(hDriver, desc, numDevices, phDevices, phContext) @ccall libze_loader.zeContextCreateEx(hDriver::ze_driver_handle_t, desc::Ptr{ze_context_desc_t}, numDevices::UInt32, phDevices::Ptr{ze_device_handle_t}, phContext::Ptr{ze_context_handle_t})::ze_result_t end @checked function zeContextDestroy(hContext) @ccall libze_loader.zeContextDestroy(hContext::ze_context_handle_t)::ze_result_t end @checked function zeContextGetStatus(hContext) @ccall libze_loader.zeContextGetStatus(hContext::ze_context_handle_t)::ze_result_t end @cenum _ze_command_queue_flag_t::UInt32 begin ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY = 1 ZE_COMMAND_QUEUE_FLAG_IN_ORDER = 2 ZE_COMMAND_QUEUE_FLAG_FORCE_UINT32 = 2147483647 end const ze_command_queue_flag_t = _ze_command_queue_flag_t @checked function zeCommandQueueCreate(hContext, hDevice, desc, phCommandQueue) @ccall libze_loader.zeCommandQueueCreate(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, desc::Ptr{ze_command_queue_desc_t}, phCommandQueue::Ptr{ze_command_queue_handle_t})::ze_result_t end @checked function zeCommandQueueDestroy(hCommandQueue) @ccall libze_loader.zeCommandQueueDestroy(hCommandQueue::ze_command_queue_handle_t)::ze_result_t end @checked function zeCommandQueueExecuteCommandLists(hCommandQueue, numCommandLists, phCommandLists, hFence) @ccall libze_loader.zeCommandQueueExecuteCommandLists(hCommandQueue::ze_command_queue_handle_t, numCommandLists::UInt32, phCommandLists::Ptr{ze_command_list_handle_t}, hFence::ze_fence_handle_t)::ze_result_t end @checked function zeCommandQueueSynchronize(hCommandQueue, timeout) @ccall libze_loader.zeCommandQueueSynchronize(hCommandQueue::ze_command_queue_handle_t, timeout::UInt64)::ze_result_t end @checked function zeCommandQueueGetOrdinal(hCommandQueue, pOrdinal) @ccall libze_loader.zeCommandQueueGetOrdinal(hCommandQueue::ze_command_queue_handle_t, pOrdinal::Ptr{UInt32})::ze_result_t end @checked function zeCommandQueueGetIndex(hCommandQueue, pIndex) @ccall libze_loader.zeCommandQueueGetIndex(hCommandQueue::ze_command_queue_handle_t, pIndex::Ptr{UInt32})::ze_result_t end @cenum _ze_command_list_flag_t::UInt32 begin ZE_COMMAND_LIST_FLAG_RELAXED_ORDERING = 1 ZE_COMMAND_LIST_FLAG_MAXIMIZE_THROUGHPUT = 2 ZE_COMMAND_LIST_FLAG_EXPLICIT_ONLY = 4 ZE_COMMAND_LIST_FLAG_IN_ORDER = 8 ZE_COMMAND_LIST_FLAG_EXP_CLONEABLE = 16 ZE_COMMAND_LIST_FLAG_FORCE_UINT32 = 2147483647 end const ze_command_list_flag_t = _ze_command_list_flag_t @checked function zeCommandListCreate(hContext, hDevice, desc, phCommandList) @ccall libze_loader.zeCommandListCreate(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, desc::Ptr{ze_command_list_desc_t}, phCommandList::Ptr{ze_command_list_handle_t})::ze_result_t end @checked function zeCommandListCreateImmediate(hContext, hDevice, altdesc, phCommandList) @ccall libze_loader.zeCommandListCreateImmediate(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, altdesc::Ptr{ze_command_queue_desc_t}, phCommandList::Ptr{ze_command_list_handle_t})::ze_result_t end @checked function zeCommandListDestroy(hCommandList) @ccall libze_loader.zeCommandListDestroy(hCommandList::ze_command_list_handle_t)::ze_result_t end @checked function zeCommandListClose(hCommandList) @ccall libze_loader.zeCommandListClose(hCommandList::ze_command_list_handle_t)::ze_result_t end @checked function zeCommandListReset(hCommandList) @ccall libze_loader.zeCommandListReset(hCommandList::ze_command_list_handle_t)::ze_result_t end @checked function zeCommandListAppendWriteGlobalTimestamp(hCommandList, dstptr, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendWriteGlobalTimestamp(hCommandList::ze_command_list_handle_t, dstptr::Ptr{UInt64}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListHostSynchronize(hCommandList, timeout) @ccall libze_loader.zeCommandListHostSynchronize(hCommandList::ze_command_list_handle_t, timeout::UInt64)::ze_result_t end @checked function zeCommandListGetDeviceHandle(hCommandList, phDevice) @ccall libze_loader.zeCommandListGetDeviceHandle(hCommandList::ze_command_list_handle_t, phDevice::Ptr{ze_device_handle_t})::ze_result_t end @checked function zeCommandListGetContextHandle(hCommandList, phContext) @ccall libze_loader.zeCommandListGetContextHandle(hCommandList::ze_command_list_handle_t, phContext::Ptr{ze_context_handle_t})::ze_result_t end @checked function zeCommandListGetOrdinal(hCommandList, pOrdinal) @ccall libze_loader.zeCommandListGetOrdinal(hCommandList::ze_command_list_handle_t, pOrdinal::Ptr{UInt32})::ze_result_t end @checked function zeCommandListImmediateGetIndex(hCommandListImmediate, pIndex) @ccall libze_loader.zeCommandListImmediateGetIndex(hCommandListImmediate::ze_command_list_handle_t, pIndex::Ptr{UInt32})::ze_result_t end @checked function zeCommandListIsImmediate(hCommandList, pIsImmediate) @ccall libze_loader.zeCommandListIsImmediate(hCommandList::ze_command_list_handle_t, pIsImmediate::Ptr{ze_bool_t})::ze_result_t end @checked function zeCommandListAppendBarrier(hCommandList, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendBarrier(hCommandList::ze_command_list_handle_t, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendMemoryRangesBarrier(hCommandList, numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendMemoryRangesBarrier(hCommandList::ze_command_list_handle_t, numRanges::UInt32, pRangeSizes::Ptr{Csize_t}, pRanges::Ptr{Ptr{Cvoid}}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeContextSystemBarrier(hContext, hDevice) @ccall libze_loader.zeContextSystemBarrier(hContext::ze_context_handle_t, hDevice::ze_device_handle_t)::ze_result_t end @checked function zeCommandListAppendMemoryCopy(hCommandList, dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendMemoryCopy(hCommandList::ze_command_list_handle_t, dstptr::PtrOrZePtr{Cvoid}, srcptr::PtrOrZePtr{Cvoid}, size::Csize_t, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendMemoryFill(hCommandList, ptr, pattern, pattern_size, size, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendMemoryFill(hCommandList::ze_command_list_handle_t, ptr::PtrOrZePtr{Cvoid}, pattern::PtrOrZePtr{Cvoid}, pattern_size::Csize_t, size::Csize_t, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendMemoryCopyRegion(hCommandList, dstptr, dstRegion, dstPitch, dstSlicePitch, srcptr, srcRegion, srcPitch, srcSlicePitch, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendMemoryCopyRegion(hCommandList::ze_command_list_handle_t, dstptr::PtrOrZePtr{Cvoid}, dstRegion::Ptr{ze_copy_region_t}, dstPitch::UInt32, dstSlicePitch::UInt32, srcptr::PtrOrZePtr{Cvoid}, srcRegion::Ptr{ze_copy_region_t}, srcPitch::UInt32, srcSlicePitch::UInt32, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendMemoryCopyFromContext(hCommandList, dstptr, hContextSrc, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendMemoryCopyFromContext(hCommandList::ze_command_list_handle_t, dstptr::PtrOrZePtr{Cvoid}, hContextSrc::ze_context_handle_t, srcptr::PtrOrZePtr{Cvoid}, size::Csize_t, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendImageCopy(hCommandList, hDstImage, hSrcImage, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendImageCopy(hCommandList::ze_command_list_handle_t, hDstImage::ze_image_handle_t, hSrcImage::ze_image_handle_t, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendImageCopyRegion(hCommandList, hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendImageCopyRegion(hCommandList::ze_command_list_handle_t, hDstImage::ze_image_handle_t, hSrcImage::ze_image_handle_t, pDstRegion::Ptr{ze_image_region_t}, pSrcRegion::Ptr{ze_image_region_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendImageCopyToMemory(hCommandList, dstptr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendImageCopyToMemory(hCommandList::ze_command_list_handle_t, dstptr::Ptr{Cvoid}, hSrcImage::ze_image_handle_t, pSrcRegion::Ptr{ze_image_region_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendImageCopyFromMemory(hCommandList, hDstImage, srcptr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendImageCopyFromMemory(hCommandList::ze_command_list_handle_t, hDstImage::ze_image_handle_t, srcptr::Ptr{Cvoid}, pDstRegion::Ptr{ze_image_region_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendMemoryPrefetch(hCommandList, ptr, size) @ccall libze_loader.zeCommandListAppendMemoryPrefetch(hCommandList::ze_command_list_handle_t, ptr::PtrOrZePtr{Cvoid}, size::Csize_t)::ze_result_t end @cenum _ze_memory_advice_t::UInt32 begin ZE_MEMORY_ADVICE_SET_READ_MOSTLY = 0 ZE_MEMORY_ADVICE_CLEAR_READ_MOSTLY = 1 ZE_MEMORY_ADVICE_SET_PREFERRED_LOCATION = 2 ZE_MEMORY_ADVICE_CLEAR_PREFERRED_LOCATION = 3 ZE_MEMORY_ADVICE_SET_NON_ATOMIC_MOSTLY = 4 ZE_MEMORY_ADVICE_CLEAR_NON_ATOMIC_MOSTLY = 5 ZE_MEMORY_ADVICE_BIAS_CACHED = 6 ZE_MEMORY_ADVICE_BIAS_UNCACHED = 7 ZE_MEMORY_ADVICE_SET_SYSTEM_MEMORY_PREFERRED_LOCATION = 8 ZE_MEMORY_ADVICE_CLEAR_SYSTEM_MEMORY_PREFERRED_LOCATION = 9 ZE_MEMORY_ADVICE_FORCE_UINT32 = 2147483647 end const ze_memory_advice_t = _ze_memory_advice_t @checked function zeCommandListAppendMemAdvise(hCommandList, hDevice, ptr, size, advice) @ccall libze_loader.zeCommandListAppendMemAdvise(hCommandList::ze_command_list_handle_t, hDevice::ze_device_handle_t, ptr::PtrOrZePtr{Cvoid}, size::Csize_t, advice::ze_memory_advice_t)::ze_result_t end @cenum _ze_event_pool_flag_t::UInt32 begin ZE_EVENT_POOL_FLAG_HOST_VISIBLE = 1 ZE_EVENT_POOL_FLAG_IPC = 2 ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP = 4 ZE_EVENT_POOL_FLAG_KERNEL_MAPPED_TIMESTAMP = 8 ZE_EVENT_POOL_FLAG_FORCE_UINT32 = 2147483647 end const ze_event_pool_flag_t = _ze_event_pool_flag_t @checked function zeEventPoolCreate(hContext, desc, numDevices, phDevices, phEventPool) @ccall libze_loader.zeEventPoolCreate(hContext::ze_context_handle_t, desc::Ptr{ze_event_pool_desc_t}, numDevices::UInt32, phDevices::Ptr{ze_device_handle_t}, phEventPool::Ptr{ze_event_pool_handle_t})::ze_result_t end @checked function zeEventPoolDestroy(hEventPool) @ccall libze_loader.zeEventPoolDestroy(hEventPool::ze_event_pool_handle_t)::ze_result_t end @cenum _ze_event_scope_flag_t::UInt32 begin ZE_EVENT_SCOPE_FLAG_SUBDEVICE = 1 ZE_EVENT_SCOPE_FLAG_DEVICE = 2 ZE_EVENT_SCOPE_FLAG_HOST = 4 ZE_EVENT_SCOPE_FLAG_FORCE_UINT32 = 2147483647 end const ze_event_scope_flag_t = _ze_event_scope_flag_t @checked function zeEventCreate(hEventPool, desc, phEvent) @ccall libze_loader.zeEventCreate(hEventPool::ze_event_pool_handle_t, desc::Ptr{ze_event_desc_t}, phEvent::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeEventDestroy(hEvent) @ccall libze_loader.zeEventDestroy(hEvent::ze_event_handle_t)::ze_result_t end @checked function zeEventPoolGetIpcHandle(hEventPool, phIpc) @ccall libze_loader.zeEventPoolGetIpcHandle(hEventPool::ze_event_pool_handle_t, phIpc::Ptr{ze_ipc_event_pool_handle_t})::ze_result_t end @checked function zeEventPoolPutIpcHandle(hContext, hIpc) @ccall libze_loader.zeEventPoolPutIpcHandle(hContext::ze_context_handle_t, hIpc::ze_ipc_event_pool_handle_t)::ze_result_t end @checked function zeEventPoolOpenIpcHandle(hContext, hIpc, phEventPool) @ccall libze_loader.zeEventPoolOpenIpcHandle(hContext::ze_context_handle_t, hIpc::ze_ipc_event_pool_handle_t, phEventPool::Ptr{ze_event_pool_handle_t})::ze_result_t end @checked function zeEventPoolCloseIpcHandle(hEventPool) @ccall libze_loader.zeEventPoolCloseIpcHandle(hEventPool::ze_event_pool_handle_t)::ze_result_t end @checked function zeCommandListAppendSignalEvent(hCommandList, hEvent) @ccall libze_loader.zeCommandListAppendSignalEvent(hCommandList::ze_command_list_handle_t, hEvent::ze_event_handle_t)::ze_result_t end @checked function zeCommandListAppendWaitOnEvents(hCommandList, numEvents, phEvents) @ccall libze_loader.zeCommandListAppendWaitOnEvents(hCommandList::ze_command_list_handle_t, numEvents::UInt32, phEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeEventHostSignal(hEvent) @ccall libze_loader.zeEventHostSignal(hEvent::ze_event_handle_t)::ze_result_t end @checked function zeEventHostSynchronize(hEvent, timeout) @ccall libze_loader.zeEventHostSynchronize(hEvent::ze_event_handle_t, timeout::UInt64)::ze_result_t end @checked function zeEventQueryStatus(hEvent) @ccall libze_loader.zeEventQueryStatus(hEvent::ze_event_handle_t)::ze_result_t end @checked function zeCommandListAppendEventReset(hCommandList, hEvent) @ccall libze_loader.zeCommandListAppendEventReset(hCommandList::ze_command_list_handle_t, hEvent::ze_event_handle_t)::ze_result_t end @checked function zeEventHostReset(hEvent) @ccall libze_loader.zeEventHostReset(hEvent::ze_event_handle_t)::ze_result_t end @checked function zeEventQueryKernelTimestamp(hEvent, dstptr) @ccall libze_loader.zeEventQueryKernelTimestamp(hEvent::ze_event_handle_t, dstptr::Ptr{ze_kernel_timestamp_result_t})::ze_result_t end @checked function zeCommandListAppendQueryKernelTimestamps(hCommandList, numEvents, phEvents, dstptr, pOffsets, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendQueryKernelTimestamps(hCommandList::ze_command_list_handle_t, numEvents::UInt32, phEvents::Ptr{ze_event_handle_t}, dstptr::Ptr{Cvoid}, pOffsets::Ptr{Csize_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeEventGetEventPool(hEvent, phEventPool) @ccall libze_loader.zeEventGetEventPool(hEvent::ze_event_handle_t, phEventPool::Ptr{ze_event_pool_handle_t})::ze_result_t end @checked function zeEventGetSignalScope(hEvent, pSignalScope) @ccall libze_loader.zeEventGetSignalScope(hEvent::ze_event_handle_t, pSignalScope::Ptr{ze_event_scope_flags_t})::ze_result_t end @checked function zeEventGetWaitScope(hEvent, pWaitScope) @ccall libze_loader.zeEventGetWaitScope(hEvent::ze_event_handle_t, pWaitScope::Ptr{ze_event_scope_flags_t})::ze_result_t end @checked function zeEventPoolGetContextHandle(hEventPool, phContext) @ccall libze_loader.zeEventPoolGetContextHandle(hEventPool::ze_event_pool_handle_t, phContext::Ptr{ze_context_handle_t})::ze_result_t end @checked function zeEventPoolGetFlags(hEventPool, pFlags) @ccall libze_loader.zeEventPoolGetFlags(hEventPool::ze_event_pool_handle_t, pFlags::Ptr{ze_event_pool_flags_t})::ze_result_t end @cenum _ze_fence_flag_t::UInt32 begin ZE_FENCE_FLAG_SIGNALED = 1 ZE_FENCE_FLAG_FORCE_UINT32 = 2147483647 end const ze_fence_flag_t = _ze_fence_flag_t @checked function zeFenceCreate(hCommandQueue, desc, phFence) @ccall libze_loader.zeFenceCreate(hCommandQueue::ze_command_queue_handle_t, desc::Ptr{ze_fence_desc_t}, phFence::Ptr{ze_fence_handle_t})::ze_result_t end @checked function zeFenceDestroy(hFence) @ccall libze_loader.zeFenceDestroy(hFence::ze_fence_handle_t)::ze_result_t end @checked function zeFenceHostSynchronize(hFence, timeout) @ccall libze_loader.zeFenceHostSynchronize(hFence::ze_fence_handle_t, timeout::UInt64)::ze_result_t end @checked function zeFenceQueryStatus(hFence) @ccall libze_loader.zeFenceQueryStatus(hFence::ze_fence_handle_t)::ze_result_t end @checked function zeFenceReset(hFence) @ccall libze_loader.zeFenceReset(hFence::ze_fence_handle_t)::ze_result_t end @cenum _ze_image_flag_t::UInt32 begin ZE_IMAGE_FLAG_KERNEL_WRITE = 1 ZE_IMAGE_FLAG_BIAS_UNCACHED = 2 ZE_IMAGE_FLAG_FORCE_UINT32 = 2147483647 end const ze_image_flag_t = _ze_image_flag_t @cenum _ze_image_sampler_filter_flag_t::UInt32 begin ZE_IMAGE_SAMPLER_FILTER_FLAG_POINT = 1 ZE_IMAGE_SAMPLER_FILTER_FLAG_LINEAR = 2 ZE_IMAGE_SAMPLER_FILTER_FLAG_FORCE_UINT32 = 2147483647 end const ze_image_sampler_filter_flag_t = _ze_image_sampler_filter_flag_t @checked function zeImageGetProperties(hDevice, desc, pImageProperties) @ccall libze_loader.zeImageGetProperties(hDevice::ze_device_handle_t, desc::Ptr{ze_image_desc_t}, pImageProperties::Ptr{ze_image_properties_t})::ze_result_t end @checked function zeImageCreate(hContext, hDevice, desc, phImage) @ccall libze_loader.zeImageCreate(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, desc::Ptr{ze_image_desc_t}, phImage::Ptr{ze_image_handle_t})::ze_result_t end @checked function zeImageDestroy(hImage) @ccall libze_loader.zeImageDestroy(hImage::ze_image_handle_t)::ze_result_t end @cenum _ze_device_mem_alloc_flag_t::UInt32 begin ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_CACHED = 1 ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED = 2 ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT = 4 ZE_DEVICE_MEM_ALLOC_FLAG_FORCE_UINT32 = 2147483647 end const ze_device_mem_alloc_flag_t = _ze_device_mem_alloc_flag_t @cenum _ze_host_mem_alloc_flag_t::UInt32 begin ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED = 1 ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED = 2 ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED = 4 ZE_HOST_MEM_ALLOC_FLAG_BIAS_INITIAL_PLACEMENT = 8 ZE_HOST_MEM_ALLOC_FLAG_FORCE_UINT32 = 2147483647 end const ze_host_mem_alloc_flag_t = _ze_host_mem_alloc_flag_t @checked function zeMemAllocShared(hContext, device_desc, host_desc, size, alignment, hDevice, pptr) @ccall libze_loader.zeMemAllocShared(hContext::ze_context_handle_t, device_desc::Ptr{ze_device_mem_alloc_desc_t}, host_desc::Ptr{ze_host_mem_alloc_desc_t}, size::Csize_t, alignment::Csize_t, hDevice::ze_device_handle_t, pptr::Ptr{Ptr{Cvoid}})::ze_result_t end @checked function zeMemAllocDevice(hContext, device_desc, size, alignment, hDevice, pptr) @ccall libze_loader.zeMemAllocDevice(hContext::ze_context_handle_t, device_desc::Ptr{ze_device_mem_alloc_desc_t}, size::Csize_t, alignment::Csize_t, hDevice::ze_device_handle_t, pptr::Ptr{Ptr{Cvoid}})::ze_result_t end @checked function zeMemAllocHost(hContext, host_desc, size, alignment, pptr) @ccall libze_loader.zeMemAllocHost(hContext::ze_context_handle_t, host_desc::Ptr{ze_host_mem_alloc_desc_t}, size::Csize_t, alignment::Csize_t, pptr::Ptr{Ptr{Cvoid}})::ze_result_t end @checked function zeMemFree(hContext, ptr) @ccall libze_loader.zeMemFree(hContext::ze_context_handle_t, ptr::PtrOrZePtr{Cvoid})::ze_result_t end @checked function zeMemGetAllocProperties(hContext, ptr, pMemAllocProperties, phDevice) @ccall libze_loader.zeMemGetAllocProperties(hContext::ze_context_handle_t, ptr::PtrOrZePtr{Cvoid}, pMemAllocProperties::Ptr{ze_memory_allocation_properties_t}, phDevice::Ptr{ze_device_handle_t})::ze_result_t end @checked function zeMemGetAddressRange(hContext, ptr, pBase, pSize) @ccall libze_loader.zeMemGetAddressRange(hContext::ze_context_handle_t, ptr::PtrOrZePtr{Cvoid}, pBase::Ptr{Ptr{Cvoid}}, pSize::Ptr{Csize_t})::ze_result_t end @checked function zeMemGetIpcHandle(hContext, ptr, pIpcHandle) @ccall libze_loader.zeMemGetIpcHandle(hContext::ze_context_handle_t, ptr::PtrOrZePtr{Cvoid}, pIpcHandle::Ptr{ze_ipc_mem_handle_t})::ze_result_t end @checked function zeMemGetIpcHandleFromFileDescriptorExp(hContext, handle, pIpcHandle) @ccall libze_loader.zeMemGetIpcHandleFromFileDescriptorExp(hContext::ze_context_handle_t, handle::UInt64, pIpcHandle::Ptr{ze_ipc_mem_handle_t})::ze_result_t end @checked function zeMemGetFileDescriptorFromIpcHandleExp(hContext, ipcHandle, pHandle) @ccall libze_loader.zeMemGetFileDescriptorFromIpcHandleExp(hContext::ze_context_handle_t, ipcHandle::ze_ipc_mem_handle_t, pHandle::Ptr{UInt64})::ze_result_t end @checked function zeMemPutIpcHandle(hContext, handle) @ccall libze_loader.zeMemPutIpcHandle(hContext::ze_context_handle_t, handle::ze_ipc_mem_handle_t)::ze_result_t end const ze_ipc_memory_flags_t = UInt32 @cenum _ze_ipc_memory_flag_t::UInt32 begin ZE_IPC_MEMORY_FLAG_BIAS_CACHED = 1 ZE_IPC_MEMORY_FLAG_BIAS_UNCACHED = 2 ZE_IPC_MEMORY_FLAG_FORCE_UINT32 = 2147483647 end const ze_ipc_memory_flag_t = _ze_ipc_memory_flag_t @checked function zeMemOpenIpcHandle(hContext, hDevice, handle, flags, pptr) @ccall libze_loader.zeMemOpenIpcHandle(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, handle::ze_ipc_mem_handle_t, flags::ze_ipc_memory_flags_t, pptr::Ptr{PtrOrZePtr{Cvoid}})::ze_result_t end @checked function zeMemCloseIpcHandle(hContext, ptr) @ccall libze_loader.zeMemCloseIpcHandle(hContext::ze_context_handle_t, ptr::PtrOrZePtr{Cvoid})::ze_result_t end const ze_memory_atomic_attr_exp_flags_t = UInt32 @cenum _ze_memory_atomic_attr_exp_flag_t::UInt32 begin ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_ATOMICS = 1 ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_HOST_ATOMICS = 2 ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_HOST_ATOMICS = 4 ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_DEVICE_ATOMICS = 8 ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_DEVICE_ATOMICS = 16 ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_SYSTEM_ATOMICS = 32 ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_SYSTEM_ATOMICS = 64 ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_memory_atomic_attr_exp_flag_t = _ze_memory_atomic_attr_exp_flag_t @checked function zeMemSetAtomicAccessAttributeExp(hContext, hDevice, ptr, size, attr) @ccall libze_loader.zeMemSetAtomicAccessAttributeExp(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, ptr::Ptr{Cvoid}, size::Csize_t, attr::ze_memory_atomic_attr_exp_flags_t)::ze_result_t end @checked function zeMemGetAtomicAccessAttributeExp(hContext, hDevice, ptr, size, pAttr) @ccall libze_loader.zeMemGetAtomicAccessAttributeExp(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, ptr::Ptr{Cvoid}, size::Csize_t, pAttr::Ptr{ze_memory_atomic_attr_exp_flags_t})::ze_result_t end @checked function zeModuleCreate(hContext, hDevice, desc, phModule, phBuildLog) @ccall libze_loader.zeModuleCreate(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, desc::Ptr{ze_module_desc_t}, phModule::Ptr{ze_module_handle_t}, phBuildLog::Ptr{ze_module_build_log_handle_t})::ze_result_t end @checked function zeModuleDestroy(hModule) @ccall libze_loader.zeModuleDestroy(hModule::ze_module_handle_t)::ze_result_t end @checked function zeModuleDynamicLink(numModules, phModules, phLinkLog) @ccall libze_loader.zeModuleDynamicLink(numModules::UInt32, phModules::Ptr{ze_module_handle_t}, phLinkLog::Ptr{ze_module_build_log_handle_t})::ze_result_t end @checked function zeModuleBuildLogDestroy(hModuleBuildLog) @ccall libze_loader.zeModuleBuildLogDestroy(hModuleBuildLog::ze_module_build_log_handle_t)::ze_result_t end @checked function zeModuleBuildLogGetString(hModuleBuildLog, pSize, pBuildLog) @ccall libze_loader.zeModuleBuildLogGetString(hModuleBuildLog::ze_module_build_log_handle_t, pSize::Ptr{Csize_t}, pBuildLog::Ptr{Cchar})::ze_result_t end @checked function zeModuleGetNativeBinary(hModule, pSize, pModuleNativeBinary) @ccall libze_loader.zeModuleGetNativeBinary(hModule::ze_module_handle_t, pSize::Ptr{Csize_t}, pModuleNativeBinary::Ptr{UInt8})::ze_result_t end @checked function zeModuleGetGlobalPointer(hModule, pGlobalName, pSize, pptr) @ccall libze_loader.zeModuleGetGlobalPointer(hModule::ze_module_handle_t, pGlobalName::Ptr{Cchar}, pSize::Ptr{Csize_t}, pptr::Ptr{Ptr{Cvoid}})::ze_result_t end @checked function zeModuleGetKernelNames(hModule, pCount, pNames) @ccall libze_loader.zeModuleGetKernelNames(hModule::ze_module_handle_t, pCount::Ptr{UInt32}, pNames::Ptr{Ptr{Cchar}})::ze_result_t end @cenum _ze_module_property_flag_t::UInt32 begin ZE_MODULE_PROPERTY_FLAG_IMPORTS = 1 ZE_MODULE_PROPERTY_FLAG_FORCE_UINT32 = 2147483647 end const ze_module_property_flag_t = _ze_module_property_flag_t @checked function zeModuleGetProperties(hModule, pModuleProperties) @ccall libze_loader.zeModuleGetProperties(hModule::ze_module_handle_t, pModuleProperties::Ptr{ze_module_properties_t})::ze_result_t end @cenum _ze_kernel_flag_t::UInt32 begin ZE_KERNEL_FLAG_FORCE_RESIDENCY = 1 ZE_KERNEL_FLAG_EXPLICIT_RESIDENCY = 2 ZE_KERNEL_FLAG_FORCE_UINT32 = 2147483647 end const ze_kernel_flag_t = _ze_kernel_flag_t @checked function zeKernelCreate(hModule, desc, phKernel) @ccall libze_loader.zeKernelCreate(hModule::ze_module_handle_t, desc::Ptr{ze_kernel_desc_t}, phKernel::Ptr{ze_kernel_handle_t})::ze_result_t end @checked function zeKernelDestroy(hKernel) @ccall libze_loader.zeKernelDestroy(hKernel::ze_kernel_handle_t)::ze_result_t end @checked function zeModuleGetFunctionPointer(hModule, pFunctionName, pfnFunction) @ccall libze_loader.zeModuleGetFunctionPointer(hModule::ze_module_handle_t, pFunctionName::Ptr{Cchar}, pfnFunction::Ptr{Ptr{Cvoid}})::ze_result_t end @checked function zeKernelSetGroupSize(hKernel, groupSizeX, groupSizeY, groupSizeZ) @ccall libze_loader.zeKernelSetGroupSize(hKernel::ze_kernel_handle_t, groupSizeX::UInt32, groupSizeY::UInt32, groupSizeZ::UInt32)::ze_result_t end @checked function zeKernelSuggestGroupSize(hKernel, globalSizeX, globalSizeY, globalSizeZ, groupSizeX, groupSizeY, groupSizeZ) @ccall libze_loader.zeKernelSuggestGroupSize(hKernel::ze_kernel_handle_t, globalSizeX::UInt32, globalSizeY::UInt32, globalSizeZ::UInt32, groupSizeX::Ptr{UInt32}, groupSizeY::Ptr{UInt32}, groupSizeZ::Ptr{UInt32})::ze_result_t end @checked function zeKernelSuggestMaxCooperativeGroupCount(hKernel, totalGroupCount) @ccall libze_loader.zeKernelSuggestMaxCooperativeGroupCount(hKernel::ze_kernel_handle_t, totalGroupCount::Ptr{UInt32})::ze_result_t end @checked function zeKernelSetArgumentValue(hKernel, argIndex, argSize, pArgValue) @ccall libze_loader.zeKernelSetArgumentValue(hKernel::ze_kernel_handle_t, argIndex::UInt32, argSize::Csize_t, pArgValue::Ptr{Cvoid})::ze_result_t end const ze_kernel_indirect_access_flags_t = UInt32 @cenum _ze_kernel_indirect_access_flag_t::UInt32 begin ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST = 1 ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE = 2 ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED = 4 ZE_KERNEL_INDIRECT_ACCESS_FLAG_FORCE_UINT32 = 2147483647 end const ze_kernel_indirect_access_flag_t = _ze_kernel_indirect_access_flag_t @checked function zeKernelSetIndirectAccess(hKernel, flags) @ccall libze_loader.zeKernelSetIndirectAccess(hKernel::ze_kernel_handle_t, flags::ze_kernel_indirect_access_flags_t)::ze_result_t end @checked function zeKernelGetIndirectAccess(hKernel, pFlags) @ccall libze_loader.zeKernelGetIndirectAccess(hKernel::ze_kernel_handle_t, pFlags::Ptr{ze_kernel_indirect_access_flags_t})::ze_result_t end @checked function zeKernelGetSourceAttributes(hKernel, pSize, pString) @ccall libze_loader.zeKernelGetSourceAttributes(hKernel::ze_kernel_handle_t, pSize::Ptr{UInt32}, pString::Ptr{Ptr{Cchar}})::ze_result_t end const ze_cache_config_flags_t = UInt32 @cenum _ze_cache_config_flag_t::UInt32 begin ZE_CACHE_CONFIG_FLAG_LARGE_SLM = 1 ZE_CACHE_CONFIG_FLAG_LARGE_DATA = 2 ZE_CACHE_CONFIG_FLAG_FORCE_UINT32 = 2147483647 end const ze_cache_config_flag_t = _ze_cache_config_flag_t @checked function zeKernelSetCacheConfig(hKernel, flags) @ccall libze_loader.zeKernelSetCacheConfig(hKernel::ze_kernel_handle_t, flags::ze_cache_config_flags_t)::ze_result_t end @checked function zeKernelGetProperties(hKernel, pKernelProperties) @ccall libze_loader.zeKernelGetProperties(hKernel::ze_kernel_handle_t, pKernelProperties::Ptr{ze_kernel_properties_t})::ze_result_t end @checked function zeKernelGetName(hKernel, pSize, pName) @ccall libze_loader.zeKernelGetName(hKernel::ze_kernel_handle_t, pSize::Ptr{Csize_t}, pName::Ptr{Cchar})::ze_result_t end @checked function zeCommandListAppendLaunchKernel(hCommandList, hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendLaunchKernel(hCommandList::ze_command_list_handle_t, hKernel::ze_kernel_handle_t, pLaunchFuncArgs::Ptr{ze_group_count_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendLaunchCooperativeKernel(hCommandList, hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendLaunchCooperativeKernel(hCommandList::ze_command_list_handle_t, hKernel::ze_kernel_handle_t, pLaunchFuncArgs::Ptr{ze_group_count_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendLaunchKernelIndirect(hCommandList, hKernel, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendLaunchKernelIndirect(hCommandList::ze_command_list_handle_t, hKernel::ze_kernel_handle_t, pLaunchArgumentsBuffer::Ptr{ze_group_count_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendLaunchMultipleKernelsIndirect(hCommandList, numKernels, phKernels, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendLaunchMultipleKernelsIndirect(hCommandList::ze_command_list_handle_t, numKernels::UInt32, phKernels::Ptr{ze_kernel_handle_t}, pCountBuffer::Ptr{UInt32}, pLaunchArgumentsBuffer::Ptr{ze_group_count_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @cenum _ze_module_program_exp_version_t::UInt32 begin ZE_MODULE_PROGRAM_EXP_VERSION_1_0 = 65536 ZE_MODULE_PROGRAM_EXP_VERSION_CURRENT = 65536 ZE_MODULE_PROGRAM_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_module_program_exp_version_t = _ze_module_program_exp_version_t @cenum _ze_raytracing_ext_version_t::UInt32 begin ZE_RAYTRACING_EXT_VERSION_1_0 = 65536 ZE_RAYTRACING_EXT_VERSION_CURRENT = 65536 ZE_RAYTRACING_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_raytracing_ext_version_t = _ze_raytracing_ext_version_t @cenum _ze_device_raytracing_ext_flag_t::UInt32 begin ZE_DEVICE_RAYTRACING_EXT_FLAG_RAYQUERY = 1 ZE_DEVICE_RAYTRACING_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_device_raytracing_ext_flag_t = _ze_device_raytracing_ext_flag_t @cenum _ze_raytracing_mem_alloc_ext_flag_t::UInt32 begin ZE_RAYTRACING_MEM_ALLOC_EXT_FLAG_TBD = 1 ZE_RAYTRACING_MEM_ALLOC_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_raytracing_mem_alloc_ext_flag_t = _ze_raytracing_mem_alloc_ext_flag_t @checked function zeContextMakeMemoryResident(hContext, hDevice, ptr, size) @ccall libze_loader.zeContextMakeMemoryResident(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, ptr::PtrOrZePtr{Cvoid}, size::Csize_t)::ze_result_t end @checked function zeContextEvictMemory(hContext, hDevice, ptr, size) @ccall libze_loader.zeContextEvictMemory(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, ptr::PtrOrZePtr{Cvoid}, size::Csize_t)::ze_result_t end @checked function zeContextMakeImageResident(hContext, hDevice, hImage) @ccall libze_loader.zeContextMakeImageResident(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, hImage::ze_image_handle_t)::ze_result_t end @checked function zeContextEvictImage(hContext, hDevice, hImage) @ccall libze_loader.zeContextEvictImage(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, hImage::ze_image_handle_t)::ze_result_t end @checked function zeSamplerCreate(hContext, hDevice, desc, phSampler) @ccall libze_loader.zeSamplerCreate(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, desc::Ptr{ze_sampler_desc_t}, phSampler::Ptr{ze_sampler_handle_t})::ze_result_t end @checked function zeSamplerDestroy(hSampler) @ccall libze_loader.zeSamplerDestroy(hSampler::ze_sampler_handle_t)::ze_result_t end @cenum _ze_memory_access_attribute_t::UInt32 begin ZE_MEMORY_ACCESS_ATTRIBUTE_NONE = 0 ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE = 1 ZE_MEMORY_ACCESS_ATTRIBUTE_READONLY = 2 ZE_MEMORY_ACCESS_ATTRIBUTE_FORCE_UINT32 = 2147483647 end const ze_memory_access_attribute_t = _ze_memory_access_attribute_t @checked function zeVirtualMemReserve(hContext, pStart, size, pptr) @ccall libze_loader.zeVirtualMemReserve(hContext::ze_context_handle_t, pStart::Ptr{Cvoid}, size::Csize_t, pptr::Ptr{Ptr{Cvoid}})::ze_result_t end @checked function zeVirtualMemFree(hContext, ptr, size) @ccall libze_loader.zeVirtualMemFree(hContext::ze_context_handle_t, ptr::PtrOrZePtr{Cvoid}, size::Csize_t)::ze_result_t end @checked function zeVirtualMemQueryPageSize(hContext, hDevice, size, pagesize) @ccall libze_loader.zeVirtualMemQueryPageSize(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, size::Csize_t, pagesize::Ptr{Csize_t})::ze_result_t end @cenum _ze_physical_mem_flag_t::UInt32 begin ZE_PHYSICAL_MEM_FLAG_ALLOCATE_ON_DEVICE = 1 ZE_PHYSICAL_MEM_FLAG_ALLOCATE_ON_HOST = 2 ZE_PHYSICAL_MEM_FLAG_FORCE_UINT32 = 2147483647 end const ze_physical_mem_flag_t = _ze_physical_mem_flag_t @checked function zePhysicalMemCreate(hContext, hDevice, desc, phPhysicalMemory) @ccall libze_loader.zePhysicalMemCreate(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, desc::Ptr{ze_physical_mem_desc_t}, phPhysicalMemory::Ptr{ze_physical_mem_handle_t})::ze_result_t end @checked function zePhysicalMemDestroy(hContext, hPhysicalMemory) @ccall libze_loader.zePhysicalMemDestroy(hContext::ze_context_handle_t, hPhysicalMemory::ze_physical_mem_handle_t)::ze_result_t end @checked function zeVirtualMemMap(hContext, ptr, size, hPhysicalMemory, offset, access) @ccall libze_loader.zeVirtualMemMap(hContext::ze_context_handle_t, ptr::Ptr{Cvoid}, size::Csize_t, hPhysicalMemory::ze_physical_mem_handle_t, offset::Csize_t, access::ze_memory_access_attribute_t)::ze_result_t end @checked function zeVirtualMemUnmap(hContext, ptr, size) @ccall libze_loader.zeVirtualMemUnmap(hContext::ze_context_handle_t, ptr::Ptr{Cvoid}, size::Csize_t)::ze_result_t end @checked function zeVirtualMemSetAccessAttribute(hContext, ptr, size, access) @ccall libze_loader.zeVirtualMemSetAccessAttribute(hContext::ze_context_handle_t, ptr::Ptr{Cvoid}, size::Csize_t, access::ze_memory_access_attribute_t)::ze_result_t end @checked function zeVirtualMemGetAccessAttribute(hContext, ptr, size, access, outSize) @ccall libze_loader.zeVirtualMemGetAccessAttribute(hContext::ze_context_handle_t, ptr::Ptr{Cvoid}, size::Csize_t, access::Ptr{ze_memory_access_attribute_t}, outSize::Ptr{Csize_t})::ze_result_t end @cenum _ze_float_atomics_ext_version_t::UInt32 begin ZE_FLOAT_ATOMICS_EXT_VERSION_1_0 = 65536 ZE_FLOAT_ATOMICS_EXT_VERSION_CURRENT = 65536 ZE_FLOAT_ATOMICS_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_float_atomics_ext_version_t = _ze_float_atomics_ext_version_t @cenum _ze_device_fp_atomic_ext_flag_t::UInt32 begin ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE = 1 ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_ADD = 2 ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX = 4 ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE = 65536 ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_ADD = 131072 ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX = 262144 ZE_DEVICE_FP_ATOMIC_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_device_fp_atomic_ext_flag_t = _ze_device_fp_atomic_ext_flag_t @cenum _ze_global_offset_exp_version_t::UInt32 begin ZE_GLOBAL_OFFSET_EXP_VERSION_1_0 = 65536 ZE_GLOBAL_OFFSET_EXP_VERSION_CURRENT = 65536 ZE_GLOBAL_OFFSET_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_global_offset_exp_version_t = _ze_global_offset_exp_version_t @checked function zeKernelSetGlobalOffsetExp(hKernel, offsetX, offsetY, offsetZ) @ccall libze_loader.zeKernelSetGlobalOffsetExp(hKernel::ze_kernel_handle_t, offsetX::UInt32, offsetY::UInt32, offsetZ::UInt32)::ze_result_t end @cenum _ze_relaxed_allocation_limits_exp_version_t::UInt32 begin ZE_RELAXED_ALLOCATION_LIMITS_EXP_VERSION_1_0 = 65536 ZE_RELAXED_ALLOCATION_LIMITS_EXP_VERSION_CURRENT = 65536 ZE_RELAXED_ALLOCATION_LIMITS_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_relaxed_allocation_limits_exp_version_t = _ze_relaxed_allocation_limits_exp_version_t @cenum _ze_relaxed_allocation_limits_exp_flag_t::UInt32 begin ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE = 1 ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_relaxed_allocation_limits_exp_flag_t = _ze_relaxed_allocation_limits_exp_flag_t @cenum _ze_kernel_get_binary_exp_version_t::UInt32 begin ZE_KERNEL_GET_BINARY_EXP_VERSION_1_0 = 65536 ZE_KERNEL_GET_BINARY_EXP_VERSION_CURRENT = 65536 ZE_KERNEL_GET_BINARY_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_kernel_get_binary_exp_version_t = _ze_kernel_get_binary_exp_version_t @checked function zeKernelGetBinaryExp(hKernel, pSize, pKernelBinary) @ccall libze_loader.zeKernelGetBinaryExp(hKernel::ze_kernel_handle_t, pSize::Ptr{Csize_t}, pKernelBinary::Ptr{UInt8})::ze_result_t end @cenum _ze_driver_ddi_handles_ext_version_t::UInt32 begin ZE_DRIVER_DDI_HANDLES_EXT_VERSION_1_0 = 65536 ZE_DRIVER_DDI_HANDLES_EXT_VERSION_CURRENT = 65536 ZE_DRIVER_DDI_HANDLES_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_driver_ddi_handles_ext_version_t = _ze_driver_ddi_handles_ext_version_t @cenum _ze_driver_ddi_handle_ext_flag_t::UInt32 begin ZE_DRIVER_DDI_HANDLE_EXT_FLAG_DDI_HANDLE_EXT_SUPPORTED = 1 ZE_DRIVER_DDI_HANDLE_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_driver_ddi_handle_ext_flag_t = _ze_driver_ddi_handle_ext_flag_t @cenum _ze_external_semaphore_ext_version_t::UInt32 begin ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_1_0 = 65536 ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_CURRENT = 65536 ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_external_semaphore_ext_version_t = _ze_external_semaphore_ext_version_t mutable struct _ze_external_semaphore_ext_handle_t end const ze_external_semaphore_ext_handle_t = Ptr{_ze_external_semaphore_ext_handle_t} @cenum _ze_external_semaphore_ext_flag_t::UInt32 begin ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_OPAQUE_FD = 1 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_OPAQUE_WIN32 = 2 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_OPAQUE_WIN32_KMT = 4 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_D3D12_FENCE = 8 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_D3D11_FENCE = 16 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_KEYED_MUTEX = 32 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_KEYED_MUTEX_KMT = 64 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_VK_TIMELINE_SEMAPHORE_FD = 128 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_VK_TIMELINE_SEMAPHORE_WIN32 = 256 ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_external_semaphore_ext_flag_t = _ze_external_semaphore_ext_flag_t @checked function zeDeviceImportExternalSemaphoreExt(hDevice, desc, phSemaphore) @ccall libze_loader.zeDeviceImportExternalSemaphoreExt(hDevice::ze_device_handle_t, desc::Ptr{ze_external_semaphore_ext_desc_t}, phSemaphore::Ptr{ze_external_semaphore_ext_handle_t})::ze_result_t end @checked function zeDeviceReleaseExternalSemaphoreExt(hSemaphore) @ccall libze_loader.zeDeviceReleaseExternalSemaphoreExt(hSemaphore::ze_external_semaphore_ext_handle_t)::ze_result_t end @checked function zeCommandListAppendSignalExternalSemaphoreExt(hCommandList, numSemaphores, phSemaphores, signalParams, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendSignalExternalSemaphoreExt(hCommandList::ze_command_list_handle_t, numSemaphores::UInt32, phSemaphores::Ptr{ze_external_semaphore_ext_handle_t}, signalParams::Ptr{ze_external_semaphore_signal_params_ext_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendWaitExternalSemaphoreExt(hCommandList, numSemaphores, phSemaphores, waitParams, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendWaitExternalSemaphoreExt(hCommandList::ze_command_list_handle_t, numSemaphores::UInt32, phSemaphores::Ptr{ze_external_semaphore_ext_handle_t}, waitParams::Ptr{ze_external_semaphore_wait_params_ext_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @cenum _ze_device_cache_line_size_ext_version_t::UInt32 begin ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_1_0 = 65536 ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_CURRENT = 65536 ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_device_cache_line_size_ext_version_t = _ze_device_cache_line_size_ext_version_t @cenum _ze_rtas_device_ext_flag_t::UInt32 begin ZE_RTAS_DEVICE_EXT_FLAG_RESERVED = 1 ZE_RTAS_DEVICE_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_device_ext_flag_t = _ze_rtas_device_ext_flag_t @cenum _ze_rtas_builder_ext_flag_t::UInt32 begin ZE_RTAS_BUILDER_EXT_FLAG_RESERVED = 1 ZE_RTAS_BUILDER_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_ext_flag_t = _ze_rtas_builder_ext_flag_t @cenum _ze_rtas_parallel_operation_ext_flag_t::UInt32 begin ZE_RTAS_PARALLEL_OPERATION_EXT_FLAG_RESERVED = 1 ZE_RTAS_PARALLEL_OPERATION_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_parallel_operation_ext_flag_t = _ze_rtas_parallel_operation_ext_flag_t const ze_rtas_builder_geometry_ext_flags_t = UInt32 @cenum _ze_rtas_builder_geometry_ext_flag_t::UInt32 begin ZE_RTAS_BUILDER_GEOMETRY_EXT_FLAG_NON_OPAQUE = 1 ZE_RTAS_BUILDER_GEOMETRY_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_geometry_ext_flag_t = _ze_rtas_builder_geometry_ext_flag_t const ze_rtas_builder_instance_ext_flags_t = UInt32 @cenum _ze_rtas_builder_instance_ext_flag_t::UInt32 begin ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_CULL_DISABLE = 1 ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE = 2 ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_FORCE_OPAQUE = 4 ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_FORCE_NON_OPAQUE = 8 ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_instance_ext_flag_t = _ze_rtas_builder_instance_ext_flag_t @cenum _ze_rtas_builder_build_op_ext_flag_t::UInt32 begin ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_COMPACT = 1 ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION = 2 ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_build_op_ext_flag_t = _ze_rtas_builder_build_op_ext_flag_t @cenum _ze_rtas_builder_geometry_type_ext_t::UInt32 begin ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_TRIANGLES = 0 ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_QUADS = 1 ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_PROCEDURAL = 2 ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_INSTANCE = 3 ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_geometry_type_ext_t = _ze_rtas_builder_geometry_type_ext_t @cenum _ze_rtas_builder_input_data_format_ext_t::UInt32 begin ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3 = 0 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3X4_COLUMN_MAJOR = 1 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3X4_ALIGNED_COLUMN_MAJOR = 2 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3X4_ROW_MAJOR = 3 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_AABB = 4 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_TRIANGLE_INDICES_UINT32 = 5 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_QUAD_INDICES_UINT32 = 6 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_input_data_format_ext_t = _ze_rtas_builder_input_data_format_ext_t mutable struct _ze_rtas_builder_ext_handle_t end const ze_rtas_builder_ext_handle_t = Ptr{_ze_rtas_builder_ext_handle_t} mutable struct _ze_rtas_parallel_operation_ext_handle_t end const ze_rtas_parallel_operation_ext_handle_t = Ptr{_ze_rtas_parallel_operation_ext_handle_t} @checked function zeRTASBuilderCreateExt(hDriver, pDescriptor, phBuilder) @ccall libze_loader.zeRTASBuilderCreateExt(hDriver::ze_driver_handle_t, pDescriptor::Ptr{ze_rtas_builder_ext_desc_t}, phBuilder::Ptr{ze_rtas_builder_ext_handle_t})::ze_result_t end @checked function zeRTASBuilderGetBuildPropertiesExt(hBuilder, pBuildOpDescriptor, pProperties) @ccall libze_loader.zeRTASBuilderGetBuildPropertiesExt(hBuilder::ze_rtas_builder_ext_handle_t, pBuildOpDescriptor::Ptr{ze_rtas_builder_build_op_ext_desc_t}, pProperties::Ptr{ze_rtas_builder_ext_properties_t})::ze_result_t end @checked function zeDriverRTASFormatCompatibilityCheckExt(hDriver, rtasFormatA, rtasFormatB) @ccall libze_loader.zeDriverRTASFormatCompatibilityCheckExt(hDriver::ze_driver_handle_t, rtasFormatA::ze_rtas_format_ext_t, rtasFormatB::ze_rtas_format_ext_t)::ze_result_t end @checked function zeRTASBuilderBuildExt(hBuilder, pBuildOpDescriptor, pScratchBuffer, scratchBufferSizeBytes, pRtasBuffer, rtasBufferSizeBytes, hParallelOperation, pBuildUserPtr, pBounds, pRtasBufferSizeBytes) @ccall libze_loader.zeRTASBuilderBuildExt(hBuilder::ze_rtas_builder_ext_handle_t, pBuildOpDescriptor::Ptr{ze_rtas_builder_build_op_ext_desc_t}, pScratchBuffer::Ptr{Cvoid}, scratchBufferSizeBytes::Csize_t, pRtasBuffer::Ptr{Cvoid}, rtasBufferSizeBytes::Csize_t, hParallelOperation::ze_rtas_parallel_operation_ext_handle_t, pBuildUserPtr::Ptr{Cvoid}, pBounds::Ptr{ze_rtas_aabb_ext_t}, pRtasBufferSizeBytes::Ptr{Csize_t})::ze_result_t end @checked function zeRTASBuilderCommandListAppendCopyExt(hCommandList, dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeRTASBuilderCommandListAppendCopyExt(hCommandList::ze_command_list_handle_t, dstptr::Ptr{Cvoid}, srcptr::Ptr{Cvoid}, size::Csize_t, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeRTASBuilderDestroyExt(hBuilder) @ccall libze_loader.zeRTASBuilderDestroyExt(hBuilder::ze_rtas_builder_ext_handle_t)::ze_result_t end @checked function zeRTASParallelOperationCreateExt(hDriver, phParallelOperation) @ccall libze_loader.zeRTASParallelOperationCreateExt(hDriver::ze_driver_handle_t, phParallelOperation::Ptr{ze_rtas_parallel_operation_ext_handle_t})::ze_result_t end @checked function zeRTASParallelOperationGetPropertiesExt(hParallelOperation, pProperties) @ccall libze_loader.zeRTASParallelOperationGetPropertiesExt(hParallelOperation::ze_rtas_parallel_operation_ext_handle_t, pProperties::Ptr{ze_rtas_parallel_operation_ext_properties_t})::ze_result_t end @checked function zeRTASParallelOperationJoinExt(hParallelOperation) @ccall libze_loader.zeRTASParallelOperationJoinExt(hParallelOperation::ze_rtas_parallel_operation_ext_handle_t)::ze_result_t end @checked function zeRTASParallelOperationDestroyExt(hParallelOperation) @ccall libze_loader.zeRTASParallelOperationDestroyExt(hParallelOperation::ze_rtas_parallel_operation_ext_handle_t)::ze_result_t end @cenum _ze_device_vector_sizes_ext_version_t::UInt32 begin ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_1_0 = 65536 ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_CURRENT = 65536 ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_device_vector_sizes_ext_version_t = _ze_device_vector_sizes_ext_version_t @checked function zeDeviceGetVectorWidthPropertiesExt(hDevice, pCount, pVectorWidthProperties) @ccall libze_loader.zeDeviceGetVectorWidthPropertiesExt(hDevice::ze_device_handle_t, pCount::Ptr{UInt32}, pVectorWidthProperties::Ptr{ze_device_vector_width_properties_ext_t})::ze_result_t end @cenum _ze_cache_reservation_ext_version_t::UInt32 begin ZE_CACHE_RESERVATION_EXT_VERSION_1_0 = 65536 ZE_CACHE_RESERVATION_EXT_VERSION_CURRENT = 65536 ZE_CACHE_RESERVATION_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_cache_reservation_ext_version_t = _ze_cache_reservation_ext_version_t @cenum _ze_cache_ext_region_t::UInt32 begin ZE_CACHE_EXT_REGION_ZE_CACHE_REGION_DEFAULT = 0 ZE_CACHE_EXT_REGION_ZE_CACHE_RESERVE_REGION = 1 ZE_CACHE_EXT_REGION_ZE_CACHE_NON_RESERVED_REGION = 2 ZE_CACHE_EXT_REGION_DEFAULT = 0 ZE_CACHE_EXT_REGION_RESERVED = 1 ZE_CACHE_EXT_REGION_NON_RESERVED = 2 ZE_CACHE_EXT_REGION_FORCE_UINT32 = 2147483647 end const ze_cache_ext_region_t = _ze_cache_ext_region_t @checked function zeDeviceReserveCacheExt(hDevice, cacheLevel, cacheReservationSize) @ccall libze_loader.zeDeviceReserveCacheExt(hDevice::ze_device_handle_t, cacheLevel::Csize_t, cacheReservationSize::Csize_t)::ze_result_t end @checked function zeDeviceSetCacheAdviceExt(hDevice, ptr, regionSize, cacheRegion) @ccall libze_loader.zeDeviceSetCacheAdviceExt(hDevice::ze_device_handle_t, ptr::Ptr{Cvoid}, regionSize::Csize_t, cacheRegion::ze_cache_ext_region_t)::ze_result_t end @cenum _ze_event_query_timestamps_exp_version_t::UInt32 begin ZE_EVENT_QUERY_TIMESTAMPS_EXP_VERSION_1_0 = 65536 ZE_EVENT_QUERY_TIMESTAMPS_EXP_VERSION_CURRENT = 65536 ZE_EVENT_QUERY_TIMESTAMPS_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_event_query_timestamps_exp_version_t = _ze_event_query_timestamps_exp_version_t @checked function zeEventQueryTimestampsExp(hEvent, hDevice, pCount, pTimestamps) @ccall libze_loader.zeEventQueryTimestampsExp(hEvent::ze_event_handle_t, hDevice::ze_device_handle_t, pCount::Ptr{UInt32}, pTimestamps::Ptr{ze_kernel_timestamp_result_t})::ze_result_t end @cenum _ze_image_memory_properties_exp_version_t::UInt32 begin ZE_IMAGE_MEMORY_PROPERTIES_EXP_VERSION_1_0 = 65536 ZE_IMAGE_MEMORY_PROPERTIES_EXP_VERSION_CURRENT = 65536 ZE_IMAGE_MEMORY_PROPERTIES_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_image_memory_properties_exp_version_t = _ze_image_memory_properties_exp_version_t @checked function zeImageGetMemoryPropertiesExp(hImage, pMemoryProperties) @ccall libze_loader.zeImageGetMemoryPropertiesExp(hImage::ze_image_handle_t, pMemoryProperties::Ptr{ze_image_memory_properties_exp_t})::ze_result_t end @cenum _ze_image_view_ext_version_t::UInt32 begin ZE_IMAGE_VIEW_EXT_VERSION_1_0 = 65536 ZE_IMAGE_VIEW_EXT_VERSION_CURRENT = 65536 ZE_IMAGE_VIEW_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_image_view_ext_version_t = _ze_image_view_ext_version_t @checked function zeImageViewCreateExt(hContext, hDevice, desc, hImage, phImageView) @ccall libze_loader.zeImageViewCreateExt(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, desc::Ptr{ze_image_desc_t}, hImage::ze_image_handle_t, phImageView::Ptr{ze_image_handle_t})::ze_result_t end @cenum _ze_image_view_exp_version_t::UInt32 begin ZE_IMAGE_VIEW_EXP_VERSION_1_0 = 65536 ZE_IMAGE_VIEW_EXP_VERSION_CURRENT = 65536 ZE_IMAGE_VIEW_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_image_view_exp_version_t = _ze_image_view_exp_version_t @checked function zeImageViewCreateExp(hContext, hDevice, desc, hImage, phImageView) @ccall libze_loader.zeImageViewCreateExp(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, desc::Ptr{ze_image_desc_t}, hImage::ze_image_handle_t, phImageView::Ptr{ze_image_handle_t})::ze_result_t end @cenum _ze_image_view_planar_ext_version_t::UInt32 begin ZE_IMAGE_VIEW_PLANAR_EXT_VERSION_1_0 = 65536 ZE_IMAGE_VIEW_PLANAR_EXT_VERSION_CURRENT = 65536 ZE_IMAGE_VIEW_PLANAR_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_image_view_planar_ext_version_t = _ze_image_view_planar_ext_version_t @cenum _ze_image_view_planar_exp_version_t::UInt32 begin ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_1_0 = 65536 ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_CURRENT = 65536 ZE_IMAGE_VIEW_PLANAR_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_image_view_planar_exp_version_t = _ze_image_view_planar_exp_version_t @cenum _ze_scheduling_hints_exp_version_t::UInt32 begin ZE_SCHEDULING_HINTS_EXP_VERSION_1_0 = 65536 ZE_SCHEDULING_HINTS_EXP_VERSION_CURRENT = 65536 ZE_SCHEDULING_HINTS_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_scheduling_hints_exp_version_t = _ze_scheduling_hints_exp_version_t @cenum _ze_scheduling_hint_exp_flag_t::UInt32 begin ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST = 1 ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN = 2 ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN = 4 ZE_SCHEDULING_HINT_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_scheduling_hint_exp_flag_t = _ze_scheduling_hint_exp_flag_t @checked function zeKernelSchedulingHintExp(hKernel, pHint) @ccall libze_loader.zeKernelSchedulingHintExp(hKernel::ze_kernel_handle_t, pHint::Ptr{ze_scheduling_hint_exp_desc_t})::ze_result_t end @cenum _ze_linkonce_odr_ext_version_t::UInt32 begin ZE_LINKONCE_ODR_EXT_VERSION_1_0 = 65536 ZE_LINKONCE_ODR_EXT_VERSION_CURRENT = 65536 ZE_LINKONCE_ODR_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_linkonce_odr_ext_version_t = _ze_linkonce_odr_ext_version_t @cenum _ze_power_saving_hint_exp_version_t::UInt32 begin ZE_POWER_SAVING_HINT_EXP_VERSION_1_0 = 65536 ZE_POWER_SAVING_HINT_EXP_VERSION_CURRENT = 65536 ZE_POWER_SAVING_HINT_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_power_saving_hint_exp_version_t = _ze_power_saving_hint_exp_version_t @cenum _ze_power_saving_hint_type_t::UInt32 begin ZE_POWER_SAVING_HINT_TYPE_MIN = 0 ZE_POWER_SAVING_HINT_TYPE_MAX = 100 ZE_POWER_SAVING_HINT_TYPE_FORCE_UINT32 = 2147483647 end const ze_power_saving_hint_type_t = _ze_power_saving_hint_type_t @cenum _ze_subgroup_ext_version_t::UInt32 begin ZE_SUBGROUP_EXT_VERSION_1_0 = 65536 ZE_SUBGROUP_EXT_VERSION_CURRENT = 65536 ZE_SUBGROUP_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_subgroup_ext_version_t = _ze_subgroup_ext_version_t @cenum _ze_eu_count_ext_version_t::UInt32 begin ZE_EU_COUNT_EXT_VERSION_1_0 = 65536 ZE_EU_COUNT_EXT_VERSION_CURRENT = 65536 ZE_EU_COUNT_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_eu_count_ext_version_t = _ze_eu_count_ext_version_t @cenum _ze_pci_properties_ext_version_t::UInt32 begin ZE_PCI_PROPERTIES_EXT_VERSION_1_0 = 65536 ZE_PCI_PROPERTIES_EXT_VERSION_CURRENT = 65536 ZE_PCI_PROPERTIES_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_pci_properties_ext_version_t = _ze_pci_properties_ext_version_t @checked function zeDevicePciGetPropertiesExt(hDevice, pPciProperties) @ccall libze_loader.zeDevicePciGetPropertiesExt(hDevice::ze_device_handle_t, pPciProperties::Ptr{ze_pci_ext_properties_t})::ze_result_t end @cenum _ze_srgb_ext_version_t::UInt32 begin ZE_SRGB_EXT_VERSION_1_0 = 65536 ZE_SRGB_EXT_VERSION_CURRENT = 65536 ZE_SRGB_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_srgb_ext_version_t = _ze_srgb_ext_version_t @cenum _ze_image_copy_ext_version_t::UInt32 begin ZE_IMAGE_COPY_EXT_VERSION_1_0 = 65536 ZE_IMAGE_COPY_EXT_VERSION_CURRENT = 65536 ZE_IMAGE_COPY_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_image_copy_ext_version_t = _ze_image_copy_ext_version_t @checked function zeCommandListAppendImageCopyToMemoryExt(hCommandList, dstptr, hSrcImage, pSrcRegion, destRowPitch, destSlicePitch, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendImageCopyToMemoryExt(hCommandList::ze_command_list_handle_t, dstptr::Ptr{Cvoid}, hSrcImage::ze_image_handle_t, pSrcRegion::Ptr{ze_image_region_t}, destRowPitch::UInt32, destSlicePitch::UInt32, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListAppendImageCopyFromMemoryExt(hCommandList, hDstImage, srcptr, pDstRegion, srcRowPitch, srcSlicePitch, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListAppendImageCopyFromMemoryExt(hCommandList::ze_command_list_handle_t, hDstImage::ze_image_handle_t, srcptr::Ptr{Cvoid}, pDstRegion::Ptr{ze_image_region_t}, srcRowPitch::UInt32, srcSlicePitch::UInt32, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @cenum _ze_image_query_alloc_properties_ext_version_t::UInt32 begin ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_VERSION_1_0 = 65536 ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_VERSION_CURRENT = 65536 ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_image_query_alloc_properties_ext_version_t = _ze_image_query_alloc_properties_ext_version_t @checked function zeImageGetAllocPropertiesExt(hContext, hImage, pImageAllocProperties) @ccall libze_loader.zeImageGetAllocPropertiesExt(hContext::ze_context_handle_t, hImage::ze_image_handle_t, pImageAllocProperties::Ptr{ze_image_allocation_ext_properties_t})::ze_result_t end @cenum _ze_linkage_inspection_ext_version_t::UInt32 begin ZE_LINKAGE_INSPECTION_EXT_VERSION_1_0 = 65536 ZE_LINKAGE_INSPECTION_EXT_VERSION_CURRENT = 65536 ZE_LINKAGE_INSPECTION_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_linkage_inspection_ext_version_t = _ze_linkage_inspection_ext_version_t @cenum _ze_linkage_inspection_ext_flag_t::UInt32 begin ZE_LINKAGE_INSPECTION_EXT_FLAG_IMPORTS = 1 ZE_LINKAGE_INSPECTION_EXT_FLAG_UNRESOLVABLE_IMPORTS = 2 ZE_LINKAGE_INSPECTION_EXT_FLAG_EXPORTS = 4 ZE_LINKAGE_INSPECTION_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_linkage_inspection_ext_flag_t = _ze_linkage_inspection_ext_flag_t @checked function zeModuleInspectLinkageExt(pInspectDesc, numModules, phModules, phLog) @ccall libze_loader.zeModuleInspectLinkageExt(pInspectDesc::Ptr{ze_linkage_inspection_ext_desc_t}, numModules::UInt32, phModules::Ptr{ze_module_handle_t}, phLog::Ptr{ze_module_build_log_handle_t})::ze_result_t end @cenum _ze_memory_compression_hints_ext_version_t::UInt32 begin ZE_MEMORY_COMPRESSION_HINTS_EXT_VERSION_1_0 = 65536 ZE_MEMORY_COMPRESSION_HINTS_EXT_VERSION_CURRENT = 65536 ZE_MEMORY_COMPRESSION_HINTS_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_memory_compression_hints_ext_version_t = _ze_memory_compression_hints_ext_version_t @cenum _ze_memory_compression_hints_ext_flag_t::UInt32 begin ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_COMPRESSED = 1 ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_UNCOMPRESSED = 2 ZE_MEMORY_COMPRESSION_HINTS_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_memory_compression_hints_ext_flag_t = _ze_memory_compression_hints_ext_flag_t @cenum _ze_memory_free_policies_ext_version_t::UInt32 begin ZE_MEMORY_FREE_POLICIES_EXT_VERSION_1_0 = 65536 ZE_MEMORY_FREE_POLICIES_EXT_VERSION_CURRENT = 65536 ZE_MEMORY_FREE_POLICIES_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_memory_free_policies_ext_version_t = _ze_memory_free_policies_ext_version_t @cenum _ze_driver_memory_free_policy_ext_flag_t::UInt32 begin ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_BLOCKING_FREE = 1 ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE = 2 ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_driver_memory_free_policy_ext_flag_t = _ze_driver_memory_free_policy_ext_flag_t @checked function zeMemFreeExt(hContext, pMemFreeDesc, ptr) @ccall libze_loader.zeMemFreeExt(hContext::ze_context_handle_t, pMemFreeDesc::Ptr{ze_memory_free_ext_desc_t}, ptr::PtrOrZePtr{Cvoid})::ze_result_t end @cenum _ze_device_luid_ext_version_t::UInt32 begin ZE_DEVICE_LUID_EXT_VERSION_1_0 = 65536 ZE_DEVICE_LUID_EXT_VERSION_CURRENT = 65536 ZE_DEVICE_LUID_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_device_luid_ext_version_t = _ze_device_luid_ext_version_t @checked function zeFabricVertexGetExp(hDriver, pCount, phVertices) @ccall libze_loader.zeFabricVertexGetExp(hDriver::ze_driver_handle_t, pCount::Ptr{UInt32}, phVertices::Ptr{ze_fabric_vertex_handle_t})::ze_result_t end @checked function zeFabricVertexGetSubVerticesExp(hVertex, pCount, phSubvertices) @ccall libze_loader.zeFabricVertexGetSubVerticesExp(hVertex::ze_fabric_vertex_handle_t, pCount::Ptr{UInt32}, phSubvertices::Ptr{ze_fabric_vertex_handle_t})::ze_result_t end @checked function zeFabricVertexGetPropertiesExp(hVertex, pVertexProperties) @ccall libze_loader.zeFabricVertexGetPropertiesExp(hVertex::ze_fabric_vertex_handle_t, pVertexProperties::Ptr{ze_fabric_vertex_exp_properties_t})::ze_result_t end @checked function zeFabricVertexGetDeviceExp(hVertex, phDevice) @ccall libze_loader.zeFabricVertexGetDeviceExp(hVertex::ze_fabric_vertex_handle_t, phDevice::Ptr{ze_device_handle_t})::ze_result_t end @checked function zeDeviceGetFabricVertexExp(hDevice, phVertex) @ccall libze_loader.zeDeviceGetFabricVertexExp(hDevice::ze_device_handle_t, phVertex::Ptr{ze_fabric_vertex_handle_t})::ze_result_t end @checked function zeFabricEdgeGetExp(hVertexA, hVertexB, pCount, phEdges) @ccall libze_loader.zeFabricEdgeGetExp(hVertexA::ze_fabric_vertex_handle_t, hVertexB::ze_fabric_vertex_handle_t, pCount::Ptr{UInt32}, phEdges::Ptr{ze_fabric_edge_handle_t})::ze_result_t end @checked function zeFabricEdgeGetVerticesExp(hEdge, phVertexA, phVertexB) @ccall libze_loader.zeFabricEdgeGetVerticesExp(hEdge::ze_fabric_edge_handle_t, phVertexA::Ptr{ze_fabric_vertex_handle_t}, phVertexB::Ptr{ze_fabric_vertex_handle_t})::ze_result_t end @checked function zeFabricEdgeGetPropertiesExp(hEdge, pEdgeProperties) @ccall libze_loader.zeFabricEdgeGetPropertiesExp(hEdge::ze_fabric_edge_handle_t, pEdgeProperties::Ptr{ze_fabric_edge_exp_properties_t})::ze_result_t end @cenum _ze_device_memory_properties_ext_version_t::UInt32 begin ZE_DEVICE_MEMORY_PROPERTIES_EXT_VERSION_1_0 = 65536 ZE_DEVICE_MEMORY_PROPERTIES_EXT_VERSION_CURRENT = 65536 ZE_DEVICE_MEMORY_PROPERTIES_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_device_memory_properties_ext_version_t = _ze_device_memory_properties_ext_version_t @cenum _ze_bfloat16_conversions_ext_version_t::UInt32 begin ZE_BFLOAT16_CONVERSIONS_EXT_VERSION_1_0 = 65536 ZE_BFLOAT16_CONVERSIONS_EXT_VERSION_CURRENT = 65536 ZE_BFLOAT16_CONVERSIONS_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_bfloat16_conversions_ext_version_t = _ze_bfloat16_conversions_ext_version_t @cenum _ze_device_ip_version_version_t::UInt32 begin ZE_DEVICE_IP_VERSION_VERSION_1_0 = 65536 ZE_DEVICE_IP_VERSION_VERSION_CURRENT = 65536 ZE_DEVICE_IP_VERSION_VERSION_FORCE_UINT32 = 2147483647 end const ze_device_ip_version_version_t = _ze_device_ip_version_version_t @cenum _ze_kernel_max_group_size_properties_ext_version_t::UInt32 begin ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_VERSION_1_0 = 65536 ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_VERSION_CURRENT = 65536 ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_kernel_max_group_size_properties_ext_version_t = _ze_kernel_max_group_size_properties_ext_version_t const ze_kernel_max_group_size_ext_properties_t = ze_kernel_max_group_size_properties_ext_t @cenum _ze_sub_allocations_exp_version_t::UInt32 begin ZE_SUB_ALLOCATIONS_EXP_VERSION_1_0 = 65536 ZE_SUB_ALLOCATIONS_EXP_VERSION_CURRENT = 65536 ZE_SUB_ALLOCATIONS_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_sub_allocations_exp_version_t = _ze_sub_allocations_exp_version_t @cenum _ze_event_query_kernel_timestamps_ext_version_t::UInt32 begin ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_VERSION_1_0 = 65536 ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_VERSION_CURRENT = 65536 ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_VERSION_FORCE_UINT32 = 2147483647 end const ze_event_query_kernel_timestamps_ext_version_t = _ze_event_query_kernel_timestamps_ext_version_t @cenum _ze_event_query_kernel_timestamps_ext_flag_t::UInt32 begin ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_KERNEL = 1 ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_SYNCHRONIZED = 2 ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_FLAG_FORCE_UINT32 = 2147483647 end const ze_event_query_kernel_timestamps_ext_flag_t = _ze_event_query_kernel_timestamps_ext_flag_t @checked function zeEventQueryKernelTimestampsExt(hEvent, hDevice, pCount, pResults) @ccall libze_loader.zeEventQueryKernelTimestampsExt(hEvent::ze_event_handle_t, hDevice::ze_device_handle_t, pCount::Ptr{UInt32}, pResults::Ptr{ze_event_query_kernel_timestamps_results_ext_properties_t})::ze_result_t end @cenum _ze_rtas_device_exp_flag_t::UInt32 begin ZE_RTAS_DEVICE_EXP_FLAG_RESERVED = 1 ZE_RTAS_DEVICE_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_device_exp_flag_t = _ze_rtas_device_exp_flag_t @cenum _ze_rtas_builder_exp_flag_t::UInt32 begin ZE_RTAS_BUILDER_EXP_FLAG_RESERVED = 1 ZE_RTAS_BUILDER_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_exp_flag_t = _ze_rtas_builder_exp_flag_t @cenum _ze_rtas_parallel_operation_exp_flag_t::UInt32 begin ZE_RTAS_PARALLEL_OPERATION_EXP_FLAG_RESERVED = 1 ZE_RTAS_PARALLEL_OPERATION_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_parallel_operation_exp_flag_t = _ze_rtas_parallel_operation_exp_flag_t const ze_rtas_builder_geometry_exp_flags_t = UInt32 @cenum _ze_rtas_builder_geometry_exp_flag_t::UInt32 begin ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_NON_OPAQUE = 1 ZE_RTAS_BUILDER_GEOMETRY_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_geometry_exp_flag_t = _ze_rtas_builder_geometry_exp_flag_t const ze_rtas_builder_instance_exp_flags_t = UInt32 @cenum _ze_rtas_builder_instance_exp_flag_t::UInt32 begin ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_TRIANGLE_CULL_DISABLE = 1 ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE = 2 ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_TRIANGLE_FORCE_OPAQUE = 4 ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_TRIANGLE_FORCE_NON_OPAQUE = 8 ZE_RTAS_BUILDER_INSTANCE_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_instance_exp_flag_t = _ze_rtas_builder_instance_exp_flag_t @cenum _ze_rtas_builder_build_op_exp_flag_t::UInt32 begin ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_COMPACT = 1 ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION = 2 ZE_RTAS_BUILDER_BUILD_OP_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_build_op_exp_flag_t = _ze_rtas_builder_build_op_exp_flag_t @cenum _ze_rtas_builder_geometry_type_exp_t::UInt32 begin ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_TRIANGLES = 0 ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_QUADS = 1 ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_PROCEDURAL = 2 ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_INSTANCE = 3 ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXP_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_geometry_type_exp_t = _ze_rtas_builder_geometry_type_exp_t @cenum _ze_rtas_builder_input_data_format_exp_t::UInt32 begin ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3 = 0 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_COLUMN_MAJOR = 1 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ALIGNED_COLUMN_MAJOR = 2 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FLOAT3X4_ROW_MAJOR = 3 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_AABB = 4 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_TRIANGLE_INDICES_UINT32 = 5 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_QUAD_INDICES_UINT32 = 6 ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXP_FORCE_UINT32 = 2147483647 end const ze_rtas_builder_input_data_format_exp_t = _ze_rtas_builder_input_data_format_exp_t mutable struct _ze_rtas_builder_exp_handle_t end const ze_rtas_builder_exp_handle_t = Ptr{_ze_rtas_builder_exp_handle_t} mutable struct _ze_rtas_parallel_operation_exp_handle_t end const ze_rtas_parallel_operation_exp_handle_t = Ptr{_ze_rtas_parallel_operation_exp_handle_t} @checked function zeRTASBuilderCreateExp(hDriver, pDescriptor, phBuilder) @ccall libze_loader.zeRTASBuilderCreateExp(hDriver::ze_driver_handle_t, pDescriptor::Ptr{ze_rtas_builder_exp_desc_t}, phBuilder::Ptr{ze_rtas_builder_exp_handle_t})::ze_result_t end @checked function zeRTASBuilderGetBuildPropertiesExp(hBuilder, pBuildOpDescriptor, pProperties) @ccall libze_loader.zeRTASBuilderGetBuildPropertiesExp(hBuilder::ze_rtas_builder_exp_handle_t, pBuildOpDescriptor::Ptr{ze_rtas_builder_build_op_exp_desc_t}, pProperties::Ptr{ze_rtas_builder_exp_properties_t})::ze_result_t end @checked function zeDriverRTASFormatCompatibilityCheckExp(hDriver, rtasFormatA, rtasFormatB) @ccall libze_loader.zeDriverRTASFormatCompatibilityCheckExp(hDriver::ze_driver_handle_t, rtasFormatA::ze_rtas_format_exp_t, rtasFormatB::ze_rtas_format_exp_t)::ze_result_t end @checked function zeRTASBuilderBuildExp(hBuilder, pBuildOpDescriptor, pScratchBuffer, scratchBufferSizeBytes, pRtasBuffer, rtasBufferSizeBytes, hParallelOperation, pBuildUserPtr, pBounds, pRtasBufferSizeBytes) @ccall libze_loader.zeRTASBuilderBuildExp(hBuilder::ze_rtas_builder_exp_handle_t, pBuildOpDescriptor::Ptr{ze_rtas_builder_build_op_exp_desc_t}, pScratchBuffer::Ptr{Cvoid}, scratchBufferSizeBytes::Csize_t, pRtasBuffer::Ptr{Cvoid}, rtasBufferSizeBytes::Csize_t, hParallelOperation::ze_rtas_parallel_operation_exp_handle_t, pBuildUserPtr::Ptr{Cvoid}, pBounds::Ptr{ze_rtas_aabb_exp_t}, pRtasBufferSizeBytes::Ptr{Csize_t})::ze_result_t end @checked function zeRTASBuilderDestroyExp(hBuilder) @ccall libze_loader.zeRTASBuilderDestroyExp(hBuilder::ze_rtas_builder_exp_handle_t)::ze_result_t end @checked function zeRTASParallelOperationCreateExp(hDriver, phParallelOperation) @ccall libze_loader.zeRTASParallelOperationCreateExp(hDriver::ze_driver_handle_t, phParallelOperation::Ptr{ze_rtas_parallel_operation_exp_handle_t})::ze_result_t end @checked function zeRTASParallelOperationGetPropertiesExp(hParallelOperation, pProperties) @ccall libze_loader.zeRTASParallelOperationGetPropertiesExp(hParallelOperation::ze_rtas_parallel_operation_exp_handle_t, pProperties::Ptr{ze_rtas_parallel_operation_exp_properties_t})::ze_result_t end @checked function zeRTASParallelOperationJoinExp(hParallelOperation) @ccall libze_loader.zeRTASParallelOperationJoinExp(hParallelOperation::ze_rtas_parallel_operation_exp_handle_t)::ze_result_t end @checked function zeRTASParallelOperationDestroyExp(hParallelOperation) @ccall libze_loader.zeRTASParallelOperationDestroyExp(hParallelOperation::ze_rtas_parallel_operation_exp_handle_t)::ze_result_t end @cenum _ze_event_pool_counter_based_exp_version_t::UInt32 begin ZE_EVENT_POOL_COUNTER_BASED_EXP_VERSION_1_0 = 65536 ZE_EVENT_POOL_COUNTER_BASED_EXP_VERSION_CURRENT = 65536 ZE_EVENT_POOL_COUNTER_BASED_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_event_pool_counter_based_exp_version_t = _ze_event_pool_counter_based_exp_version_t @cenum _ze_event_pool_counter_based_exp_flag_t::UInt32 begin ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE = 1 ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE = 2 ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_event_pool_counter_based_exp_flag_t = _ze_event_pool_counter_based_exp_flag_t @cenum _ze_bindless_image_exp_version_t::UInt32 begin ZE_BINDLESS_IMAGE_EXP_VERSION_1_0 = 65536 ZE_BINDLESS_IMAGE_EXP_VERSION_CURRENT = 65536 ZE_BINDLESS_IMAGE_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_bindless_image_exp_version_t = _ze_bindless_image_exp_version_t @cenum _ze_image_bindless_exp_flag_t::UInt32 begin ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS = 1 ZE_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE = 2 ZE_IMAGE_BINDLESS_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_image_bindless_exp_flag_t = _ze_image_bindless_exp_flag_t @checked function zeMemGetPitchFor2dImage(hContext, hDevice, imageWidth, imageHeight, elementSizeInBytes, rowPitch) @ccall libze_loader.zeMemGetPitchFor2dImage(hContext::ze_context_handle_t, hDevice::ze_device_handle_t, imageWidth::Csize_t, imageHeight::Csize_t, elementSizeInBytes::Cuint, rowPitch::Ptr{Csize_t})::ze_result_t end @checked function zeImageGetDeviceOffsetExp(hImage, pDeviceOffset) @ccall libze_loader.zeImageGetDeviceOffsetExp(hImage::ze_image_handle_t, pDeviceOffset::Ptr{UInt64})::ze_result_t end @cenum _ze_command_list_clone_exp_version_t::UInt32 begin ZE_COMMAND_LIST_CLONE_EXP_VERSION_1_0 = 65536 ZE_COMMAND_LIST_CLONE_EXP_VERSION_CURRENT = 65536 ZE_COMMAND_LIST_CLONE_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_command_list_clone_exp_version_t = _ze_command_list_clone_exp_version_t @checked function zeCommandListCreateCloneExp(hCommandList, phClonedCommandList) @ccall libze_loader.zeCommandListCreateCloneExp(hCommandList::ze_command_list_handle_t, phClonedCommandList::Ptr{ze_command_list_handle_t})::ze_result_t end @cenum _ze_immediate_command_list_append_exp_version_t::UInt32 begin ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_VERSION_1_0 = 65536 ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_VERSION_CURRENT = 65536 ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_immediate_command_list_append_exp_version_t = _ze_immediate_command_list_append_exp_version_t @checked function zeCommandListImmediateAppendCommandListsExp(hCommandListImmediate, numCommandLists, phCommandLists, hSignalEvent, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListImmediateAppendCommandListsExp(hCommandListImmediate::ze_command_list_handle_t, numCommandLists::UInt32, phCommandLists::Ptr{ze_command_list_handle_t}, hSignalEvent::ze_event_handle_t, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @cenum _ze_mutable_command_list_exp_version_t::UInt32 begin ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_1_0 = 65536 ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_1_1 = 65537 ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_CURRENT = 65537 ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_FORCE_UINT32 = 2147483647 end const ze_mutable_command_list_exp_version_t = _ze_mutable_command_list_exp_version_t @cenum _ze_mutable_command_exp_flag_t::UInt32 begin ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS = 1 ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT = 2 ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE = 4 ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET = 8 ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT = 16 ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS = 32 ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_INSTRUCTION = 64 ZE_MUTABLE_COMMAND_EXP_FLAG_GRAPH_ARGUMENTS = 128 ZE_MUTABLE_COMMAND_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_mutable_command_exp_flag_t = _ze_mutable_command_exp_flag_t @cenum _ze_mutable_command_list_exp_flag_t::UInt32 begin ZE_MUTABLE_COMMAND_LIST_EXP_FLAG_RESERVED = 1 ZE_MUTABLE_COMMAND_LIST_EXP_FLAG_FORCE_UINT32 = 2147483647 end const ze_mutable_command_list_exp_flag_t = _ze_mutable_command_list_exp_flag_t @checked function zeCommandListGetNextCommandIdExp(hCommandList, desc, pCommandId) @ccall libze_loader.zeCommandListGetNextCommandIdExp(hCommandList::ze_command_list_handle_t, desc::Ptr{ze_mutable_command_id_exp_desc_t}, pCommandId::Ptr{UInt64})::ze_result_t end @checked function zeCommandListGetNextCommandIdWithKernelsExp(hCommandList, desc, numKernels, phKernels, pCommandId) @ccall libze_loader.zeCommandListGetNextCommandIdWithKernelsExp(hCommandList::ze_command_list_handle_t, desc::Ptr{ze_mutable_command_id_exp_desc_t}, numKernels::UInt32, phKernels::Ptr{ze_kernel_handle_t}, pCommandId::Ptr{UInt64})::ze_result_t end @checked function zeCommandListUpdateMutableCommandsExp(hCommandList, desc) @ccall libze_loader.zeCommandListUpdateMutableCommandsExp(hCommandList::ze_command_list_handle_t, desc::Ptr{ze_mutable_commands_exp_desc_t})::ze_result_t end @checked function zeCommandListUpdateMutableCommandSignalEventExp(hCommandList, commandId, hSignalEvent) @ccall libze_loader.zeCommandListUpdateMutableCommandSignalEventExp(hCommandList::ze_command_list_handle_t, commandId::UInt64, hSignalEvent::ze_event_handle_t)::ze_result_t end @checked function zeCommandListUpdateMutableCommandWaitEventsExp(hCommandList, commandId, numWaitEvents, phWaitEvents) @ccall libze_loader.zeCommandListUpdateMutableCommandWaitEventsExp(hCommandList::ze_command_list_handle_t, commandId::UInt64, numWaitEvents::UInt32, phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end @checked function zeCommandListUpdateMutableCommandKernelsExp(hCommandList, numKernels, pCommandId, phKernels) @ccall libze_loader.zeCommandListUpdateMutableCommandKernelsExp(hCommandList::ze_command_list_handle_t, numKernels::UInt32, pCommandId::Ptr{UInt64}, phKernels::Ptr{ze_kernel_handle_t})::ze_result_t end struct _ze_init_params_t pflags::Ptr{ze_init_flags_t} end const ze_init_params_t = _ze_init_params_t # typedef void ( ZE_APICALL * ze_pfnInitCb_t ) ( ze_init_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnInitCb_t = Ptr{Cvoid} struct _ze_global_callbacks_t pfnInitCb::ze_pfnInitCb_t end const ze_global_callbacks_t = _ze_global_callbacks_t struct _ze_driver_get_params_t ppCount::Ptr{Ptr{UInt32}} pphDrivers::Ptr{Ptr{ze_driver_handle_t}} end const ze_driver_get_params_t = _ze_driver_get_params_t # typedef void ( ZE_APICALL * ze_pfnDriverGetCb_t ) ( ze_driver_get_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDriverGetCb_t = Ptr{Cvoid} struct _ze_driver_get_api_version_params_t phDriver::Ptr{ze_driver_handle_t} pversion::Ptr{Ptr{ze_api_version_t}} end const ze_driver_get_api_version_params_t = _ze_driver_get_api_version_params_t # typedef void ( ZE_APICALL * ze_pfnDriverGetApiVersionCb_t ) ( ze_driver_get_api_version_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDriverGetApiVersionCb_t = Ptr{Cvoid} struct _ze_driver_get_properties_params_t phDriver::Ptr{ze_driver_handle_t} ppDriverProperties::Ptr{Ptr{ze_driver_properties_t}} end const ze_driver_get_properties_params_t = _ze_driver_get_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDriverGetPropertiesCb_t ) ( ze_driver_get_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDriverGetPropertiesCb_t = Ptr{Cvoid} struct _ze_driver_get_ipc_properties_params_t phDriver::Ptr{ze_driver_handle_t} ppIpcProperties::Ptr{Ptr{ze_driver_ipc_properties_t}} end const ze_driver_get_ipc_properties_params_t = _ze_driver_get_ipc_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDriverGetIpcPropertiesCb_t ) ( ze_driver_get_ipc_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDriverGetIpcPropertiesCb_t = Ptr{Cvoid} struct _ze_driver_get_extension_properties_params_t phDriver::Ptr{ze_driver_handle_t} ppCount::Ptr{Ptr{UInt32}} ppExtensionProperties::Ptr{Ptr{ze_driver_extension_properties_t}} end const ze_driver_get_extension_properties_params_t = _ze_driver_get_extension_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDriverGetExtensionPropertiesCb_t ) ( ze_driver_get_extension_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDriverGetExtensionPropertiesCb_t = Ptr{Cvoid} struct _ze_driver_callbacks_t pfnGetCb::ze_pfnDriverGetCb_t pfnGetApiVersionCb::ze_pfnDriverGetApiVersionCb_t pfnGetPropertiesCb::ze_pfnDriverGetPropertiesCb_t pfnGetIpcPropertiesCb::ze_pfnDriverGetIpcPropertiesCb_t pfnGetExtensionPropertiesCb::ze_pfnDriverGetExtensionPropertiesCb_t end const ze_driver_callbacks_t = _ze_driver_callbacks_t struct _ze_device_get_params_t phDriver::Ptr{ze_driver_handle_t} ppCount::Ptr{Ptr{UInt32}} pphDevices::Ptr{Ptr{ze_device_handle_t}} end const ze_device_get_params_t = _ze_device_get_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetCb_t ) ( ze_device_get_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetCb_t = Ptr{Cvoid} struct _ze_device_get_sub_devices_params_t phDevice::Ptr{ze_device_handle_t} ppCount::Ptr{Ptr{UInt32}} pphSubdevices::Ptr{Ptr{ze_device_handle_t}} end const ze_device_get_sub_devices_params_t = _ze_device_get_sub_devices_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetSubDevicesCb_t ) ( ze_device_get_sub_devices_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetSubDevicesCb_t = Ptr{Cvoid} struct _ze_device_get_properties_params_t phDevice::Ptr{ze_device_handle_t} ppDeviceProperties::Ptr{Ptr{ze_device_properties_t}} end const ze_device_get_properties_params_t = _ze_device_get_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetPropertiesCb_t ) ( ze_device_get_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetPropertiesCb_t = Ptr{Cvoid} struct _ze_device_get_compute_properties_params_t phDevice::Ptr{ze_device_handle_t} ppComputeProperties::Ptr{Ptr{ze_device_compute_properties_t}} end const ze_device_get_compute_properties_params_t = _ze_device_get_compute_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetComputePropertiesCb_t ) ( ze_device_get_compute_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetComputePropertiesCb_t = Ptr{Cvoid} struct _ze_device_get_module_properties_params_t phDevice::Ptr{ze_device_handle_t} ppModuleProperties::Ptr{Ptr{ze_device_module_properties_t}} end const ze_device_get_module_properties_params_t = _ze_device_get_module_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetModulePropertiesCb_t ) ( ze_device_get_module_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetModulePropertiesCb_t = Ptr{Cvoid} struct _ze_device_get_command_queue_group_properties_params_t phDevice::Ptr{ze_device_handle_t} ppCount::Ptr{Ptr{UInt32}} ppCommandQueueGroupProperties::Ptr{Ptr{ze_command_queue_group_properties_t}} end const ze_device_get_command_queue_group_properties_params_t = _ze_device_get_command_queue_group_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetCommandQueueGroupPropertiesCb_t ) ( ze_device_get_command_queue_group_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetCommandQueueGroupPropertiesCb_t = Ptr{Cvoid} struct _ze_device_get_memory_properties_params_t phDevice::Ptr{ze_device_handle_t} ppCount::Ptr{Ptr{UInt32}} ppMemProperties::Ptr{Ptr{ze_device_memory_properties_t}} end const ze_device_get_memory_properties_params_t = _ze_device_get_memory_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetMemoryPropertiesCb_t ) ( ze_device_get_memory_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetMemoryPropertiesCb_t = Ptr{Cvoid} struct _ze_device_get_memory_access_properties_params_t phDevice::Ptr{ze_device_handle_t} ppMemAccessProperties::Ptr{Ptr{ze_device_memory_access_properties_t}} end const ze_device_get_memory_access_properties_params_t = _ze_device_get_memory_access_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetMemoryAccessPropertiesCb_t ) ( ze_device_get_memory_access_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetMemoryAccessPropertiesCb_t = Ptr{Cvoid} struct _ze_device_get_cache_properties_params_t phDevice::Ptr{ze_device_handle_t} ppCount::Ptr{Ptr{UInt32}} ppCacheProperties::Ptr{Ptr{ze_device_cache_properties_t}} end const ze_device_get_cache_properties_params_t = _ze_device_get_cache_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetCachePropertiesCb_t ) ( ze_device_get_cache_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetCachePropertiesCb_t = Ptr{Cvoid} struct _ze_device_get_image_properties_params_t phDevice::Ptr{ze_device_handle_t} ppImageProperties::Ptr{Ptr{ze_device_image_properties_t}} end const ze_device_get_image_properties_params_t = _ze_device_get_image_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetImagePropertiesCb_t ) ( ze_device_get_image_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetImagePropertiesCb_t = Ptr{Cvoid} struct _ze_device_get_external_memory_properties_params_t phDevice::Ptr{ze_device_handle_t} ppExternalMemoryProperties::Ptr{Ptr{ze_device_external_memory_properties_t}} end const ze_device_get_external_memory_properties_params_t = _ze_device_get_external_memory_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetExternalMemoryPropertiesCb_t ) ( ze_device_get_external_memory_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetExternalMemoryPropertiesCb_t = Ptr{Cvoid} struct _ze_device_get_p2_p_properties_params_t phDevice::Ptr{ze_device_handle_t} phPeerDevice::Ptr{ze_device_handle_t} ppP2PProperties::Ptr{Ptr{ze_device_p2p_properties_t}} end const ze_device_get_p2_p_properties_params_t = _ze_device_get_p2_p_properties_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetP2PPropertiesCb_t ) ( ze_device_get_p2_p_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetP2PPropertiesCb_t = Ptr{Cvoid} struct _ze_device_can_access_peer_params_t phDevice::Ptr{ze_device_handle_t} phPeerDevice::Ptr{ze_device_handle_t} pvalue::Ptr{Ptr{ze_bool_t}} end const ze_device_can_access_peer_params_t = _ze_device_can_access_peer_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceCanAccessPeerCb_t ) ( ze_device_can_access_peer_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceCanAccessPeerCb_t = Ptr{Cvoid} struct _ze_device_get_status_params_t phDevice::Ptr{ze_device_handle_t} end const ze_device_get_status_params_t = _ze_device_get_status_params_t # typedef void ( ZE_APICALL * ze_pfnDeviceGetStatusCb_t ) ( ze_device_get_status_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnDeviceGetStatusCb_t = Ptr{Cvoid} struct _ze_device_callbacks_t pfnGetCb::ze_pfnDeviceGetCb_t pfnGetSubDevicesCb::ze_pfnDeviceGetSubDevicesCb_t pfnGetPropertiesCb::ze_pfnDeviceGetPropertiesCb_t pfnGetComputePropertiesCb::ze_pfnDeviceGetComputePropertiesCb_t pfnGetModulePropertiesCb::ze_pfnDeviceGetModulePropertiesCb_t pfnGetCommandQueueGroupPropertiesCb::ze_pfnDeviceGetCommandQueueGroupPropertiesCb_t pfnGetMemoryPropertiesCb::ze_pfnDeviceGetMemoryPropertiesCb_t pfnGetMemoryAccessPropertiesCb::ze_pfnDeviceGetMemoryAccessPropertiesCb_t pfnGetCachePropertiesCb::ze_pfnDeviceGetCachePropertiesCb_t pfnGetImagePropertiesCb::ze_pfnDeviceGetImagePropertiesCb_t pfnGetExternalMemoryPropertiesCb::ze_pfnDeviceGetExternalMemoryPropertiesCb_t pfnGetP2PPropertiesCb::ze_pfnDeviceGetP2PPropertiesCb_t pfnCanAccessPeerCb::ze_pfnDeviceCanAccessPeerCb_t pfnGetStatusCb::ze_pfnDeviceGetStatusCb_t end const ze_device_callbacks_t = _ze_device_callbacks_t struct _ze_context_create_params_t phDriver::Ptr{ze_driver_handle_t} pdesc::Ptr{Ptr{ze_context_desc_t}} pphContext::Ptr{Ptr{ze_context_handle_t}} end const ze_context_create_params_t = _ze_context_create_params_t # typedef void ( ZE_APICALL * ze_pfnContextCreateCb_t ) ( ze_context_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnContextCreateCb_t = Ptr{Cvoid} struct _ze_context_destroy_params_t phContext::Ptr{ze_context_handle_t} end const ze_context_destroy_params_t = _ze_context_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnContextDestroyCb_t ) ( ze_context_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnContextDestroyCb_t = Ptr{Cvoid} struct _ze_context_get_status_params_t phContext::Ptr{ze_context_handle_t} end const ze_context_get_status_params_t = _ze_context_get_status_params_t # typedef void ( ZE_APICALL * ze_pfnContextGetStatusCb_t ) ( ze_context_get_status_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnContextGetStatusCb_t = Ptr{Cvoid} struct _ze_context_system_barrier_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} end const ze_context_system_barrier_params_t = _ze_context_system_barrier_params_t # typedef void ( ZE_APICALL * ze_pfnContextSystemBarrierCb_t ) ( ze_context_system_barrier_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnContextSystemBarrierCb_t = Ptr{Cvoid} struct _ze_context_make_memory_resident_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} pptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} end const ze_context_make_memory_resident_params_t = _ze_context_make_memory_resident_params_t # typedef void ( ZE_APICALL * ze_pfnContextMakeMemoryResidentCb_t ) ( ze_context_make_memory_resident_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnContextMakeMemoryResidentCb_t = Ptr{Cvoid} struct _ze_context_evict_memory_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} pptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} end const ze_context_evict_memory_params_t = _ze_context_evict_memory_params_t # typedef void ( ZE_APICALL * ze_pfnContextEvictMemoryCb_t ) ( ze_context_evict_memory_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnContextEvictMemoryCb_t = Ptr{Cvoid} struct _ze_context_make_image_resident_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} phImage::Ptr{ze_image_handle_t} end const ze_context_make_image_resident_params_t = _ze_context_make_image_resident_params_t # typedef void ( ZE_APICALL * ze_pfnContextMakeImageResidentCb_t ) ( ze_context_make_image_resident_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnContextMakeImageResidentCb_t = Ptr{Cvoid} struct _ze_context_evict_image_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} phImage::Ptr{ze_image_handle_t} end const ze_context_evict_image_params_t = _ze_context_evict_image_params_t # typedef void ( ZE_APICALL * ze_pfnContextEvictImageCb_t ) ( ze_context_evict_image_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnContextEvictImageCb_t = Ptr{Cvoid} struct _ze_context_callbacks_t pfnCreateCb::ze_pfnContextCreateCb_t pfnDestroyCb::ze_pfnContextDestroyCb_t pfnGetStatusCb::ze_pfnContextGetStatusCb_t pfnSystemBarrierCb::ze_pfnContextSystemBarrierCb_t pfnMakeMemoryResidentCb::ze_pfnContextMakeMemoryResidentCb_t pfnEvictMemoryCb::ze_pfnContextEvictMemoryCb_t pfnMakeImageResidentCb::ze_pfnContextMakeImageResidentCb_t pfnEvictImageCb::ze_pfnContextEvictImageCb_t end const ze_context_callbacks_t = _ze_context_callbacks_t struct _ze_command_queue_create_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} pdesc::Ptr{Ptr{ze_command_queue_desc_t}} pphCommandQueue::Ptr{Ptr{ze_command_queue_handle_t}} end const ze_command_queue_create_params_t = _ze_command_queue_create_params_t # typedef void ( ZE_APICALL * ze_pfnCommandQueueCreateCb_t ) ( ze_command_queue_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandQueueCreateCb_t = Ptr{Cvoid} struct _ze_command_queue_destroy_params_t phCommandQueue::Ptr{ze_command_queue_handle_t} end const ze_command_queue_destroy_params_t = _ze_command_queue_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnCommandQueueDestroyCb_t ) ( ze_command_queue_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandQueueDestroyCb_t = Ptr{Cvoid} struct _ze_command_queue_execute_command_lists_params_t phCommandQueue::Ptr{ze_command_queue_handle_t} pnumCommandLists::Ptr{UInt32} pphCommandLists::Ptr{Ptr{ze_command_list_handle_t}} phFence::Ptr{ze_fence_handle_t} end const ze_command_queue_execute_command_lists_params_t = _ze_command_queue_execute_command_lists_params_t # typedef void ( ZE_APICALL * ze_pfnCommandQueueExecuteCommandListsCb_t ) ( ze_command_queue_execute_command_lists_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandQueueExecuteCommandListsCb_t = Ptr{Cvoid} struct _ze_command_queue_synchronize_params_t phCommandQueue::Ptr{ze_command_queue_handle_t} ptimeout::Ptr{UInt64} end const ze_command_queue_synchronize_params_t = _ze_command_queue_synchronize_params_t # typedef void ( ZE_APICALL * ze_pfnCommandQueueSynchronizeCb_t ) ( ze_command_queue_synchronize_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandQueueSynchronizeCb_t = Ptr{Cvoid} struct _ze_command_queue_callbacks_t pfnCreateCb::ze_pfnCommandQueueCreateCb_t pfnDestroyCb::ze_pfnCommandQueueDestroyCb_t pfnExecuteCommandListsCb::ze_pfnCommandQueueExecuteCommandListsCb_t pfnSynchronizeCb::ze_pfnCommandQueueSynchronizeCb_t end const ze_command_queue_callbacks_t = _ze_command_queue_callbacks_t struct _ze_command_list_create_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} pdesc::Ptr{Ptr{ze_command_list_desc_t}} pphCommandList::Ptr{Ptr{ze_command_list_handle_t}} end const ze_command_list_create_params_t = _ze_command_list_create_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListCreateCb_t ) ( ze_command_list_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListCreateCb_t = Ptr{Cvoid} struct _ze_command_list_create_immediate_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} paltdesc::Ptr{Ptr{ze_command_queue_desc_t}} pphCommandList::Ptr{Ptr{ze_command_list_handle_t}} end const ze_command_list_create_immediate_params_t = _ze_command_list_create_immediate_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListCreateImmediateCb_t ) ( ze_command_list_create_immediate_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListCreateImmediateCb_t = Ptr{Cvoid} struct _ze_command_list_destroy_params_t phCommandList::Ptr{ze_command_list_handle_t} end const ze_command_list_destroy_params_t = _ze_command_list_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListDestroyCb_t ) ( ze_command_list_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListDestroyCb_t = Ptr{Cvoid} struct _ze_command_list_close_params_t phCommandList::Ptr{ze_command_list_handle_t} end const ze_command_list_close_params_t = _ze_command_list_close_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListCloseCb_t ) ( ze_command_list_close_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListCloseCb_t = Ptr{Cvoid} struct _ze_command_list_reset_params_t phCommandList::Ptr{ze_command_list_handle_t} end const ze_command_list_reset_params_t = _ze_command_list_reset_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListResetCb_t ) ( ze_command_list_reset_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListResetCb_t = Ptr{Cvoid} struct _ze_command_list_append_write_global_timestamp_params_t phCommandList::Ptr{ze_command_list_handle_t} pdstptr::Ptr{Ptr{UInt64}} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_write_global_timestamp_params_t = _ze_command_list_append_write_global_timestamp_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendWriteGlobalTimestampCb_t ) ( ze_command_list_append_write_global_timestamp_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendWriteGlobalTimestampCb_t = Ptr{Cvoid} struct _ze_command_list_append_barrier_params_t phCommandList::Ptr{ze_command_list_handle_t} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_barrier_params_t = _ze_command_list_append_barrier_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendBarrierCb_t ) ( ze_command_list_append_barrier_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendBarrierCb_t = Ptr{Cvoid} struct _ze_command_list_append_memory_ranges_barrier_params_t phCommandList::Ptr{ze_command_list_handle_t} pnumRanges::Ptr{UInt32} ppRangeSizes::Ptr{Ptr{Csize_t}} ppRanges::Ptr{Ptr{Ptr{Cvoid}}} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_memory_ranges_barrier_params_t = _ze_command_list_append_memory_ranges_barrier_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendMemoryRangesBarrierCb_t ) ( ze_command_list_append_memory_ranges_barrier_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendMemoryRangesBarrierCb_t = Ptr{Cvoid} struct _ze_command_list_append_memory_copy_params_t phCommandList::Ptr{ze_command_list_handle_t} pdstptr::Ptr{Ptr{Cvoid}} psrcptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_memory_copy_params_t = _ze_command_list_append_memory_copy_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendMemoryCopyCb_t ) ( ze_command_list_append_memory_copy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendMemoryCopyCb_t = Ptr{Cvoid} struct _ze_command_list_append_memory_fill_params_t phCommandList::Ptr{ze_command_list_handle_t} pptr::Ptr{Ptr{Cvoid}} ppattern::Ptr{Ptr{Cvoid}} ppattern_size::Ptr{Csize_t} psize::Ptr{Csize_t} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_memory_fill_params_t = _ze_command_list_append_memory_fill_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendMemoryFillCb_t ) ( ze_command_list_append_memory_fill_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendMemoryFillCb_t = Ptr{Cvoid} struct _ze_command_list_append_memory_copy_region_params_t phCommandList::Ptr{ze_command_list_handle_t} pdstptr::Ptr{Ptr{Cvoid}} pdstRegion::Ptr{Ptr{ze_copy_region_t}} pdstPitch::Ptr{UInt32} pdstSlicePitch::Ptr{UInt32} psrcptr::Ptr{Ptr{Cvoid}} psrcRegion::Ptr{Ptr{ze_copy_region_t}} psrcPitch::Ptr{UInt32} psrcSlicePitch::Ptr{UInt32} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_memory_copy_region_params_t = _ze_command_list_append_memory_copy_region_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendMemoryCopyRegionCb_t ) ( ze_command_list_append_memory_copy_region_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendMemoryCopyRegionCb_t = Ptr{Cvoid} struct _ze_command_list_append_memory_copy_from_context_params_t phCommandList::Ptr{ze_command_list_handle_t} pdstptr::Ptr{Ptr{Cvoid}} phContextSrc::Ptr{ze_context_handle_t} psrcptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_memory_copy_from_context_params_t = _ze_command_list_append_memory_copy_from_context_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendMemoryCopyFromContextCb_t ) ( ze_command_list_append_memory_copy_from_context_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendMemoryCopyFromContextCb_t = Ptr{Cvoid} struct _ze_command_list_append_image_copy_params_t phCommandList::Ptr{ze_command_list_handle_t} phDstImage::Ptr{ze_image_handle_t} phSrcImage::Ptr{ze_image_handle_t} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_image_copy_params_t = _ze_command_list_append_image_copy_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendImageCopyCb_t ) ( ze_command_list_append_image_copy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendImageCopyCb_t = Ptr{Cvoid} struct _ze_command_list_append_image_copy_region_params_t phCommandList::Ptr{ze_command_list_handle_t} phDstImage::Ptr{ze_image_handle_t} phSrcImage::Ptr{ze_image_handle_t} ppDstRegion::Ptr{Ptr{ze_image_region_t}} ppSrcRegion::Ptr{Ptr{ze_image_region_t}} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_image_copy_region_params_t = _ze_command_list_append_image_copy_region_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendImageCopyRegionCb_t ) ( ze_command_list_append_image_copy_region_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendImageCopyRegionCb_t = Ptr{Cvoid} struct _ze_command_list_append_image_copy_to_memory_params_t phCommandList::Ptr{ze_command_list_handle_t} pdstptr::Ptr{Ptr{Cvoid}} phSrcImage::Ptr{ze_image_handle_t} ppSrcRegion::Ptr{Ptr{ze_image_region_t}} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_image_copy_to_memory_params_t = _ze_command_list_append_image_copy_to_memory_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendImageCopyToMemoryCb_t ) ( ze_command_list_append_image_copy_to_memory_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendImageCopyToMemoryCb_t = Ptr{Cvoid} struct _ze_command_list_append_image_copy_from_memory_params_t phCommandList::Ptr{ze_command_list_handle_t} phDstImage::Ptr{ze_image_handle_t} psrcptr::Ptr{Ptr{Cvoid}} ppDstRegion::Ptr{Ptr{ze_image_region_t}} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_image_copy_from_memory_params_t = _ze_command_list_append_image_copy_from_memory_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendImageCopyFromMemoryCb_t ) ( ze_command_list_append_image_copy_from_memory_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendImageCopyFromMemoryCb_t = Ptr{Cvoid} struct _ze_command_list_append_memory_prefetch_params_t phCommandList::Ptr{ze_command_list_handle_t} pptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} end const ze_command_list_append_memory_prefetch_params_t = _ze_command_list_append_memory_prefetch_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendMemoryPrefetchCb_t ) ( ze_command_list_append_memory_prefetch_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendMemoryPrefetchCb_t = Ptr{Cvoid} struct _ze_command_list_append_mem_advise_params_t phCommandList::Ptr{ze_command_list_handle_t} phDevice::Ptr{ze_device_handle_t} pptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} padvice::Ptr{ze_memory_advice_t} end const ze_command_list_append_mem_advise_params_t = _ze_command_list_append_mem_advise_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendMemAdviseCb_t ) ( ze_command_list_append_mem_advise_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendMemAdviseCb_t = Ptr{Cvoid} struct _ze_command_list_append_signal_event_params_t phCommandList::Ptr{ze_command_list_handle_t} phEvent::Ptr{ze_event_handle_t} end const ze_command_list_append_signal_event_params_t = _ze_command_list_append_signal_event_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendSignalEventCb_t ) ( ze_command_list_append_signal_event_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendSignalEventCb_t = Ptr{Cvoid} struct _ze_command_list_append_wait_on_events_params_t phCommandList::Ptr{ze_command_list_handle_t} pnumEvents::Ptr{UInt32} pphEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_wait_on_events_params_t = _ze_command_list_append_wait_on_events_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendWaitOnEventsCb_t ) ( ze_command_list_append_wait_on_events_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendWaitOnEventsCb_t = Ptr{Cvoid} struct _ze_command_list_append_event_reset_params_t phCommandList::Ptr{ze_command_list_handle_t} phEvent::Ptr{ze_event_handle_t} end const ze_command_list_append_event_reset_params_t = _ze_command_list_append_event_reset_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendEventResetCb_t ) ( ze_command_list_append_event_reset_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendEventResetCb_t = Ptr{Cvoid} struct _ze_command_list_append_query_kernel_timestamps_params_t phCommandList::Ptr{ze_command_list_handle_t} pnumEvents::Ptr{UInt32} pphEvents::Ptr{Ptr{ze_event_handle_t}} pdstptr::Ptr{Ptr{Cvoid}} ppOffsets::Ptr{Ptr{Csize_t}} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_query_kernel_timestamps_params_t = _ze_command_list_append_query_kernel_timestamps_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendQueryKernelTimestampsCb_t ) ( ze_command_list_append_query_kernel_timestamps_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendQueryKernelTimestampsCb_t = Ptr{Cvoid} struct _ze_command_list_append_launch_kernel_params_t phCommandList::Ptr{ze_command_list_handle_t} phKernel::Ptr{ze_kernel_handle_t} ppLaunchFuncArgs::Ptr{Ptr{ze_group_count_t}} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_launch_kernel_params_t = _ze_command_list_append_launch_kernel_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendLaunchKernelCb_t ) ( ze_command_list_append_launch_kernel_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendLaunchKernelCb_t = Ptr{Cvoid} struct _ze_command_list_append_launch_cooperative_kernel_params_t phCommandList::Ptr{ze_command_list_handle_t} phKernel::Ptr{ze_kernel_handle_t} ppLaunchFuncArgs::Ptr{Ptr{ze_group_count_t}} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_launch_cooperative_kernel_params_t = _ze_command_list_append_launch_cooperative_kernel_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendLaunchCooperativeKernelCb_t ) ( ze_command_list_append_launch_cooperative_kernel_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendLaunchCooperativeKernelCb_t = Ptr{Cvoid} struct _ze_command_list_append_launch_kernel_indirect_params_t phCommandList::Ptr{ze_command_list_handle_t} phKernel::Ptr{ze_kernel_handle_t} ppLaunchArgumentsBuffer::Ptr{Ptr{ze_group_count_t}} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_launch_kernel_indirect_params_t = _ze_command_list_append_launch_kernel_indirect_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendLaunchKernelIndirectCb_t ) ( ze_command_list_append_launch_kernel_indirect_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendLaunchKernelIndirectCb_t = Ptr{Cvoid} struct _ze_command_list_append_launch_multiple_kernels_indirect_params_t phCommandList::Ptr{ze_command_list_handle_t} pnumKernels::Ptr{UInt32} pphKernels::Ptr{Ptr{ze_kernel_handle_t}} ppCountBuffer::Ptr{Ptr{UInt32}} ppLaunchArgumentsBuffer::Ptr{Ptr{ze_group_count_t}} phSignalEvent::Ptr{ze_event_handle_t} pnumWaitEvents::Ptr{UInt32} pphWaitEvents::Ptr{Ptr{ze_event_handle_t}} end const ze_command_list_append_launch_multiple_kernels_indirect_params_t = _ze_command_list_append_launch_multiple_kernels_indirect_params_t # typedef void ( ZE_APICALL * ze_pfnCommandListAppendLaunchMultipleKernelsIndirectCb_t ) ( ze_command_list_append_launch_multiple_kernels_indirect_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnCommandListAppendLaunchMultipleKernelsIndirectCb_t = Ptr{Cvoid} struct _ze_command_list_callbacks_t pfnCreateCb::ze_pfnCommandListCreateCb_t pfnCreateImmediateCb::ze_pfnCommandListCreateImmediateCb_t pfnDestroyCb::ze_pfnCommandListDestroyCb_t pfnCloseCb::ze_pfnCommandListCloseCb_t pfnResetCb::ze_pfnCommandListResetCb_t pfnAppendWriteGlobalTimestampCb::ze_pfnCommandListAppendWriteGlobalTimestampCb_t pfnAppendBarrierCb::ze_pfnCommandListAppendBarrierCb_t pfnAppendMemoryRangesBarrierCb::ze_pfnCommandListAppendMemoryRangesBarrierCb_t pfnAppendMemoryCopyCb::ze_pfnCommandListAppendMemoryCopyCb_t pfnAppendMemoryFillCb::ze_pfnCommandListAppendMemoryFillCb_t pfnAppendMemoryCopyRegionCb::ze_pfnCommandListAppendMemoryCopyRegionCb_t pfnAppendMemoryCopyFromContextCb::ze_pfnCommandListAppendMemoryCopyFromContextCb_t pfnAppendImageCopyCb::ze_pfnCommandListAppendImageCopyCb_t pfnAppendImageCopyRegionCb::ze_pfnCommandListAppendImageCopyRegionCb_t pfnAppendImageCopyToMemoryCb::ze_pfnCommandListAppendImageCopyToMemoryCb_t pfnAppendImageCopyFromMemoryCb::ze_pfnCommandListAppendImageCopyFromMemoryCb_t pfnAppendMemoryPrefetchCb::ze_pfnCommandListAppendMemoryPrefetchCb_t pfnAppendMemAdviseCb::ze_pfnCommandListAppendMemAdviseCb_t pfnAppendSignalEventCb::ze_pfnCommandListAppendSignalEventCb_t pfnAppendWaitOnEventsCb::ze_pfnCommandListAppendWaitOnEventsCb_t pfnAppendEventResetCb::ze_pfnCommandListAppendEventResetCb_t pfnAppendQueryKernelTimestampsCb::ze_pfnCommandListAppendQueryKernelTimestampsCb_t pfnAppendLaunchKernelCb::ze_pfnCommandListAppendLaunchKernelCb_t pfnAppendLaunchCooperativeKernelCb::ze_pfnCommandListAppendLaunchCooperativeKernelCb_t pfnAppendLaunchKernelIndirectCb::ze_pfnCommandListAppendLaunchKernelIndirectCb_t pfnAppendLaunchMultipleKernelsIndirectCb::ze_pfnCommandListAppendLaunchMultipleKernelsIndirectCb_t end const ze_command_list_callbacks_t = _ze_command_list_callbacks_t struct _ze_image_get_properties_params_t phDevice::Ptr{ze_device_handle_t} pdesc::Ptr{Ptr{ze_image_desc_t}} ppImageProperties::Ptr{Ptr{ze_image_properties_t}} end const ze_image_get_properties_params_t = _ze_image_get_properties_params_t # typedef void ( ZE_APICALL * ze_pfnImageGetPropertiesCb_t ) ( ze_image_get_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnImageGetPropertiesCb_t = Ptr{Cvoid} struct _ze_image_create_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} pdesc::Ptr{Ptr{ze_image_desc_t}} pphImage::Ptr{Ptr{ze_image_handle_t}} end const ze_image_create_params_t = _ze_image_create_params_t # typedef void ( ZE_APICALL * ze_pfnImageCreateCb_t ) ( ze_image_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnImageCreateCb_t = Ptr{Cvoid} struct _ze_image_destroy_params_t phImage::Ptr{ze_image_handle_t} end const ze_image_destroy_params_t = _ze_image_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnImageDestroyCb_t ) ( ze_image_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnImageDestroyCb_t = Ptr{Cvoid} struct _ze_image_callbacks_t pfnGetPropertiesCb::ze_pfnImageGetPropertiesCb_t pfnCreateCb::ze_pfnImageCreateCb_t pfnDestroyCb::ze_pfnImageDestroyCb_t end const ze_image_callbacks_t = _ze_image_callbacks_t struct _ze_mem_alloc_shared_params_t phContext::Ptr{ze_context_handle_t} pdevice_desc::Ptr{Ptr{ze_device_mem_alloc_desc_t}} phost_desc::Ptr{Ptr{ze_host_mem_alloc_desc_t}} psize::Ptr{Csize_t} palignment::Ptr{Csize_t} phDevice::Ptr{ze_device_handle_t} ppptr::Ptr{Ptr{Ptr{Cvoid}}} end const ze_mem_alloc_shared_params_t = _ze_mem_alloc_shared_params_t # typedef void ( ZE_APICALL * ze_pfnMemAllocSharedCb_t ) ( ze_mem_alloc_shared_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnMemAllocSharedCb_t = Ptr{Cvoid} struct _ze_mem_alloc_device_params_t phContext::Ptr{ze_context_handle_t} pdevice_desc::Ptr{Ptr{ze_device_mem_alloc_desc_t}} psize::Ptr{Csize_t} palignment::Ptr{Csize_t} phDevice::Ptr{ze_device_handle_t} ppptr::Ptr{Ptr{Ptr{Cvoid}}} end const ze_mem_alloc_device_params_t = _ze_mem_alloc_device_params_t # typedef void ( ZE_APICALL * ze_pfnMemAllocDeviceCb_t ) ( ze_mem_alloc_device_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnMemAllocDeviceCb_t = Ptr{Cvoid} struct _ze_mem_alloc_host_params_t phContext::Ptr{ze_context_handle_t} phost_desc::Ptr{Ptr{ze_host_mem_alloc_desc_t}} psize::Ptr{Csize_t} palignment::Ptr{Csize_t} ppptr::Ptr{Ptr{Ptr{Cvoid}}} end const ze_mem_alloc_host_params_t = _ze_mem_alloc_host_params_t # typedef void ( ZE_APICALL * ze_pfnMemAllocHostCb_t ) ( ze_mem_alloc_host_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnMemAllocHostCb_t = Ptr{Cvoid} struct _ze_mem_free_params_t phContext::Ptr{ze_context_handle_t} pptr::Ptr{Ptr{Cvoid}} end const ze_mem_free_params_t = _ze_mem_free_params_t # typedef void ( ZE_APICALL * ze_pfnMemFreeCb_t ) ( ze_mem_free_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnMemFreeCb_t = Ptr{Cvoid} struct _ze_mem_get_alloc_properties_params_t phContext::Ptr{ze_context_handle_t} pptr::Ptr{Ptr{Cvoid}} ppMemAllocProperties::Ptr{Ptr{ze_memory_allocation_properties_t}} pphDevice::Ptr{Ptr{ze_device_handle_t}} end const ze_mem_get_alloc_properties_params_t = _ze_mem_get_alloc_properties_params_t # typedef void ( ZE_APICALL * ze_pfnMemGetAllocPropertiesCb_t ) ( ze_mem_get_alloc_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnMemGetAllocPropertiesCb_t = Ptr{Cvoid} struct _ze_mem_get_address_range_params_t phContext::Ptr{ze_context_handle_t} pptr::Ptr{Ptr{Cvoid}} ppBase::Ptr{Ptr{Ptr{Cvoid}}} ppSize::Ptr{Ptr{Csize_t}} end const ze_mem_get_address_range_params_t = _ze_mem_get_address_range_params_t # typedef void ( ZE_APICALL * ze_pfnMemGetAddressRangeCb_t ) ( ze_mem_get_address_range_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnMemGetAddressRangeCb_t = Ptr{Cvoid} struct _ze_mem_get_ipc_handle_params_t phContext::Ptr{ze_context_handle_t} pptr::Ptr{Ptr{Cvoid}} ppIpcHandle::Ptr{Ptr{ze_ipc_mem_handle_t}} end const ze_mem_get_ipc_handle_params_t = _ze_mem_get_ipc_handle_params_t # typedef void ( ZE_APICALL * ze_pfnMemGetIpcHandleCb_t ) ( ze_mem_get_ipc_handle_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnMemGetIpcHandleCb_t = Ptr{Cvoid} struct _ze_mem_open_ipc_handle_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} phandle::Ptr{ze_ipc_mem_handle_t} pflags::Ptr{ze_ipc_memory_flags_t} ppptr::Ptr{Ptr{Ptr{Cvoid}}} end const ze_mem_open_ipc_handle_params_t = _ze_mem_open_ipc_handle_params_t # typedef void ( ZE_APICALL * ze_pfnMemOpenIpcHandleCb_t ) ( ze_mem_open_ipc_handle_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnMemOpenIpcHandleCb_t = Ptr{Cvoid} struct _ze_mem_close_ipc_handle_params_t phContext::Ptr{ze_context_handle_t} pptr::Ptr{Ptr{Cvoid}} end const ze_mem_close_ipc_handle_params_t = _ze_mem_close_ipc_handle_params_t # typedef void ( ZE_APICALL * ze_pfnMemCloseIpcHandleCb_t ) ( ze_mem_close_ipc_handle_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnMemCloseIpcHandleCb_t = Ptr{Cvoid} struct _ze_mem_callbacks_t pfnAllocSharedCb::ze_pfnMemAllocSharedCb_t pfnAllocDeviceCb::ze_pfnMemAllocDeviceCb_t pfnAllocHostCb::ze_pfnMemAllocHostCb_t pfnFreeCb::ze_pfnMemFreeCb_t pfnGetAllocPropertiesCb::ze_pfnMemGetAllocPropertiesCb_t pfnGetAddressRangeCb::ze_pfnMemGetAddressRangeCb_t pfnGetIpcHandleCb::ze_pfnMemGetIpcHandleCb_t pfnOpenIpcHandleCb::ze_pfnMemOpenIpcHandleCb_t pfnCloseIpcHandleCb::ze_pfnMemCloseIpcHandleCb_t end const ze_mem_callbacks_t = _ze_mem_callbacks_t struct _ze_fence_create_params_t phCommandQueue::Ptr{ze_command_queue_handle_t} pdesc::Ptr{Ptr{ze_fence_desc_t}} pphFence::Ptr{Ptr{ze_fence_handle_t}} end const ze_fence_create_params_t = _ze_fence_create_params_t # typedef void ( ZE_APICALL * ze_pfnFenceCreateCb_t ) ( ze_fence_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnFenceCreateCb_t = Ptr{Cvoid} struct _ze_fence_destroy_params_t phFence::Ptr{ze_fence_handle_t} end const ze_fence_destroy_params_t = _ze_fence_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnFenceDestroyCb_t ) ( ze_fence_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnFenceDestroyCb_t = Ptr{Cvoid} struct _ze_fence_host_synchronize_params_t phFence::Ptr{ze_fence_handle_t} ptimeout::Ptr{UInt64} end const ze_fence_host_synchronize_params_t = _ze_fence_host_synchronize_params_t # typedef void ( ZE_APICALL * ze_pfnFenceHostSynchronizeCb_t ) ( ze_fence_host_synchronize_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnFenceHostSynchronizeCb_t = Ptr{Cvoid} struct _ze_fence_query_status_params_t phFence::Ptr{ze_fence_handle_t} end const ze_fence_query_status_params_t = _ze_fence_query_status_params_t # typedef void ( ZE_APICALL * ze_pfnFenceQueryStatusCb_t ) ( ze_fence_query_status_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnFenceQueryStatusCb_t = Ptr{Cvoid} struct _ze_fence_reset_params_t phFence::Ptr{ze_fence_handle_t} end const ze_fence_reset_params_t = _ze_fence_reset_params_t # typedef void ( ZE_APICALL * ze_pfnFenceResetCb_t ) ( ze_fence_reset_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnFenceResetCb_t = Ptr{Cvoid} struct _ze_fence_callbacks_t pfnCreateCb::ze_pfnFenceCreateCb_t pfnDestroyCb::ze_pfnFenceDestroyCb_t pfnHostSynchronizeCb::ze_pfnFenceHostSynchronizeCb_t pfnQueryStatusCb::ze_pfnFenceQueryStatusCb_t pfnResetCb::ze_pfnFenceResetCb_t end const ze_fence_callbacks_t = _ze_fence_callbacks_t struct _ze_event_pool_create_params_t phContext::Ptr{ze_context_handle_t} pdesc::Ptr{Ptr{ze_event_pool_desc_t}} pnumDevices::Ptr{UInt32} pphDevices::Ptr{Ptr{ze_device_handle_t}} pphEventPool::Ptr{Ptr{ze_event_pool_handle_t}} end const ze_event_pool_create_params_t = _ze_event_pool_create_params_t # typedef void ( ZE_APICALL * ze_pfnEventPoolCreateCb_t ) ( ze_event_pool_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventPoolCreateCb_t = Ptr{Cvoid} struct _ze_event_pool_destroy_params_t phEventPool::Ptr{ze_event_pool_handle_t} end const ze_event_pool_destroy_params_t = _ze_event_pool_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnEventPoolDestroyCb_t ) ( ze_event_pool_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventPoolDestroyCb_t = Ptr{Cvoid} struct _ze_event_pool_get_ipc_handle_params_t phEventPool::Ptr{ze_event_pool_handle_t} pphIpc::Ptr{Ptr{ze_ipc_event_pool_handle_t}} end const ze_event_pool_get_ipc_handle_params_t = _ze_event_pool_get_ipc_handle_params_t # typedef void ( ZE_APICALL * ze_pfnEventPoolGetIpcHandleCb_t ) ( ze_event_pool_get_ipc_handle_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventPoolGetIpcHandleCb_t = Ptr{Cvoid} struct _ze_event_pool_open_ipc_handle_params_t phContext::Ptr{ze_context_handle_t} phIpc::Ptr{ze_ipc_event_pool_handle_t} pphEventPool::Ptr{Ptr{ze_event_pool_handle_t}} end const ze_event_pool_open_ipc_handle_params_t = _ze_event_pool_open_ipc_handle_params_t # typedef void ( ZE_APICALL * ze_pfnEventPoolOpenIpcHandleCb_t ) ( ze_event_pool_open_ipc_handle_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventPoolOpenIpcHandleCb_t = Ptr{Cvoid} struct _ze_event_pool_close_ipc_handle_params_t phEventPool::Ptr{ze_event_pool_handle_t} end const ze_event_pool_close_ipc_handle_params_t = _ze_event_pool_close_ipc_handle_params_t # typedef void ( ZE_APICALL * ze_pfnEventPoolCloseIpcHandleCb_t ) ( ze_event_pool_close_ipc_handle_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventPoolCloseIpcHandleCb_t = Ptr{Cvoid} struct _ze_event_pool_callbacks_t pfnCreateCb::ze_pfnEventPoolCreateCb_t pfnDestroyCb::ze_pfnEventPoolDestroyCb_t pfnGetIpcHandleCb::ze_pfnEventPoolGetIpcHandleCb_t pfnOpenIpcHandleCb::ze_pfnEventPoolOpenIpcHandleCb_t pfnCloseIpcHandleCb::ze_pfnEventPoolCloseIpcHandleCb_t end const ze_event_pool_callbacks_t = _ze_event_pool_callbacks_t struct _ze_event_create_params_t phEventPool::Ptr{ze_event_pool_handle_t} pdesc::Ptr{Ptr{ze_event_desc_t}} pphEvent::Ptr{Ptr{ze_event_handle_t}} end const ze_event_create_params_t = _ze_event_create_params_t # typedef void ( ZE_APICALL * ze_pfnEventCreateCb_t ) ( ze_event_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventCreateCb_t = Ptr{Cvoid} struct _ze_event_destroy_params_t phEvent::Ptr{ze_event_handle_t} end const ze_event_destroy_params_t = _ze_event_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnEventDestroyCb_t ) ( ze_event_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventDestroyCb_t = Ptr{Cvoid} struct _ze_event_host_signal_params_t phEvent::Ptr{ze_event_handle_t} end const ze_event_host_signal_params_t = _ze_event_host_signal_params_t # typedef void ( ZE_APICALL * ze_pfnEventHostSignalCb_t ) ( ze_event_host_signal_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventHostSignalCb_t = Ptr{Cvoid} struct _ze_event_host_synchronize_params_t phEvent::Ptr{ze_event_handle_t} ptimeout::Ptr{UInt64} end const ze_event_host_synchronize_params_t = _ze_event_host_synchronize_params_t # typedef void ( ZE_APICALL * ze_pfnEventHostSynchronizeCb_t ) ( ze_event_host_synchronize_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventHostSynchronizeCb_t = Ptr{Cvoid} struct _ze_event_query_status_params_t phEvent::Ptr{ze_event_handle_t} end const ze_event_query_status_params_t = _ze_event_query_status_params_t # typedef void ( ZE_APICALL * ze_pfnEventQueryStatusCb_t ) ( ze_event_query_status_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventQueryStatusCb_t = Ptr{Cvoid} struct _ze_event_host_reset_params_t phEvent::Ptr{ze_event_handle_t} end const ze_event_host_reset_params_t = _ze_event_host_reset_params_t # typedef void ( ZE_APICALL * ze_pfnEventHostResetCb_t ) ( ze_event_host_reset_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventHostResetCb_t = Ptr{Cvoid} struct _ze_event_query_kernel_timestamp_params_t phEvent::Ptr{ze_event_handle_t} pdstptr::Ptr{Ptr{ze_kernel_timestamp_result_t}} end const ze_event_query_kernel_timestamp_params_t = _ze_event_query_kernel_timestamp_params_t # typedef void ( ZE_APICALL * ze_pfnEventQueryKernelTimestampCb_t ) ( ze_event_query_kernel_timestamp_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnEventQueryKernelTimestampCb_t = Ptr{Cvoid} struct _ze_event_callbacks_t pfnCreateCb::ze_pfnEventCreateCb_t pfnDestroyCb::ze_pfnEventDestroyCb_t pfnHostSignalCb::ze_pfnEventHostSignalCb_t pfnHostSynchronizeCb::ze_pfnEventHostSynchronizeCb_t pfnQueryStatusCb::ze_pfnEventQueryStatusCb_t pfnHostResetCb::ze_pfnEventHostResetCb_t pfnQueryKernelTimestampCb::ze_pfnEventQueryKernelTimestampCb_t end const ze_event_callbacks_t = _ze_event_callbacks_t struct _ze_module_create_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} pdesc::Ptr{Ptr{ze_module_desc_t}} pphModule::Ptr{Ptr{ze_module_handle_t}} pphBuildLog::Ptr{Ptr{ze_module_build_log_handle_t}} end const ze_module_create_params_t = _ze_module_create_params_t # typedef void ( ZE_APICALL * ze_pfnModuleCreateCb_t ) ( ze_module_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnModuleCreateCb_t = Ptr{Cvoid} struct _ze_module_destroy_params_t phModule::Ptr{ze_module_handle_t} end const ze_module_destroy_params_t = _ze_module_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnModuleDestroyCb_t ) ( ze_module_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnModuleDestroyCb_t = Ptr{Cvoid} struct _ze_module_dynamic_link_params_t pnumModules::Ptr{UInt32} pphModules::Ptr{Ptr{ze_module_handle_t}} pphLinkLog::Ptr{Ptr{ze_module_build_log_handle_t}} end const ze_module_dynamic_link_params_t = _ze_module_dynamic_link_params_t # typedef void ( ZE_APICALL * ze_pfnModuleDynamicLinkCb_t ) ( ze_module_dynamic_link_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnModuleDynamicLinkCb_t = Ptr{Cvoid} struct _ze_module_get_native_binary_params_t phModule::Ptr{ze_module_handle_t} ppSize::Ptr{Ptr{Csize_t}} ppModuleNativeBinary::Ptr{Ptr{UInt8}} end const ze_module_get_native_binary_params_t = _ze_module_get_native_binary_params_t # typedef void ( ZE_APICALL * ze_pfnModuleGetNativeBinaryCb_t ) ( ze_module_get_native_binary_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnModuleGetNativeBinaryCb_t = Ptr{Cvoid} struct _ze_module_get_global_pointer_params_t phModule::Ptr{ze_module_handle_t} ppGlobalName::Ptr{Ptr{Cchar}} ppSize::Ptr{Ptr{Csize_t}} ppptr::Ptr{Ptr{Ptr{Cvoid}}} end const ze_module_get_global_pointer_params_t = _ze_module_get_global_pointer_params_t # typedef void ( ZE_APICALL * ze_pfnModuleGetGlobalPointerCb_t ) ( ze_module_get_global_pointer_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnModuleGetGlobalPointerCb_t = Ptr{Cvoid} struct _ze_module_get_kernel_names_params_t phModule::Ptr{ze_module_handle_t} ppCount::Ptr{Ptr{UInt32}} ppNames::Ptr{Ptr{Ptr{Cchar}}} end const ze_module_get_kernel_names_params_t = _ze_module_get_kernel_names_params_t # typedef void ( ZE_APICALL * ze_pfnModuleGetKernelNamesCb_t ) ( ze_module_get_kernel_names_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnModuleGetKernelNamesCb_t = Ptr{Cvoid} struct _ze_module_get_properties_params_t phModule::Ptr{ze_module_handle_t} ppModuleProperties::Ptr{Ptr{ze_module_properties_t}} end const ze_module_get_properties_params_t = _ze_module_get_properties_params_t # typedef void ( ZE_APICALL * ze_pfnModuleGetPropertiesCb_t ) ( ze_module_get_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnModuleGetPropertiesCb_t = Ptr{Cvoid} struct _ze_module_get_function_pointer_params_t phModule::Ptr{ze_module_handle_t} ppFunctionName::Ptr{Ptr{Cchar}} ppfnFunction::Ptr{Ptr{Ptr{Cvoid}}} end const ze_module_get_function_pointer_params_t = _ze_module_get_function_pointer_params_t # typedef void ( ZE_APICALL * ze_pfnModuleGetFunctionPointerCb_t ) ( ze_module_get_function_pointer_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnModuleGetFunctionPointerCb_t = Ptr{Cvoid} struct _ze_module_callbacks_t pfnCreateCb::ze_pfnModuleCreateCb_t pfnDestroyCb::ze_pfnModuleDestroyCb_t pfnDynamicLinkCb::ze_pfnModuleDynamicLinkCb_t pfnGetNativeBinaryCb::ze_pfnModuleGetNativeBinaryCb_t pfnGetGlobalPointerCb::ze_pfnModuleGetGlobalPointerCb_t pfnGetKernelNamesCb::ze_pfnModuleGetKernelNamesCb_t pfnGetPropertiesCb::ze_pfnModuleGetPropertiesCb_t pfnGetFunctionPointerCb::ze_pfnModuleGetFunctionPointerCb_t end const ze_module_callbacks_t = _ze_module_callbacks_t struct _ze_module_build_log_destroy_params_t phModuleBuildLog::Ptr{ze_module_build_log_handle_t} end const ze_module_build_log_destroy_params_t = _ze_module_build_log_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnModuleBuildLogDestroyCb_t ) ( ze_module_build_log_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnModuleBuildLogDestroyCb_t = Ptr{Cvoid} struct _ze_module_build_log_get_string_params_t phModuleBuildLog::Ptr{ze_module_build_log_handle_t} ppSize::Ptr{Ptr{Csize_t}} ppBuildLog::Ptr{Ptr{Cchar}} end const ze_module_build_log_get_string_params_t = _ze_module_build_log_get_string_params_t # typedef void ( ZE_APICALL * ze_pfnModuleBuildLogGetStringCb_t ) ( ze_module_build_log_get_string_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnModuleBuildLogGetStringCb_t = Ptr{Cvoid} struct _ze_module_build_log_callbacks_t pfnDestroyCb::ze_pfnModuleBuildLogDestroyCb_t pfnGetStringCb::ze_pfnModuleBuildLogGetStringCb_t end const ze_module_build_log_callbacks_t = _ze_module_build_log_callbacks_t struct _ze_kernel_create_params_t phModule::Ptr{ze_module_handle_t} pdesc::Ptr{Ptr{ze_kernel_desc_t}} pphKernel::Ptr{Ptr{ze_kernel_handle_t}} end const ze_kernel_create_params_t = _ze_kernel_create_params_t # typedef void ( ZE_APICALL * ze_pfnKernelCreateCb_t ) ( ze_kernel_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelCreateCb_t = Ptr{Cvoid} struct _ze_kernel_destroy_params_t phKernel::Ptr{ze_kernel_handle_t} end const ze_kernel_destroy_params_t = _ze_kernel_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnKernelDestroyCb_t ) ( ze_kernel_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelDestroyCb_t = Ptr{Cvoid} struct _ze_kernel_set_cache_config_params_t phKernel::Ptr{ze_kernel_handle_t} pflags::Ptr{ze_cache_config_flags_t} end const ze_kernel_set_cache_config_params_t = _ze_kernel_set_cache_config_params_t # typedef void ( ZE_APICALL * ze_pfnKernelSetCacheConfigCb_t ) ( ze_kernel_set_cache_config_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelSetCacheConfigCb_t = Ptr{Cvoid} struct _ze_kernel_set_group_size_params_t phKernel::Ptr{ze_kernel_handle_t} pgroupSizeX::Ptr{UInt32} pgroupSizeY::Ptr{UInt32} pgroupSizeZ::Ptr{UInt32} end const ze_kernel_set_group_size_params_t = _ze_kernel_set_group_size_params_t # typedef void ( ZE_APICALL * ze_pfnKernelSetGroupSizeCb_t ) ( ze_kernel_set_group_size_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelSetGroupSizeCb_t = Ptr{Cvoid} struct _ze_kernel_suggest_group_size_params_t phKernel::Ptr{ze_kernel_handle_t} pglobalSizeX::Ptr{UInt32} pglobalSizeY::Ptr{UInt32} pglobalSizeZ::Ptr{UInt32} pgroupSizeX::Ptr{Ptr{UInt32}} pgroupSizeY::Ptr{Ptr{UInt32}} pgroupSizeZ::Ptr{Ptr{UInt32}} end const ze_kernel_suggest_group_size_params_t = _ze_kernel_suggest_group_size_params_t # typedef void ( ZE_APICALL * ze_pfnKernelSuggestGroupSizeCb_t ) ( ze_kernel_suggest_group_size_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelSuggestGroupSizeCb_t = Ptr{Cvoid} struct _ze_kernel_suggest_max_cooperative_group_count_params_t phKernel::Ptr{ze_kernel_handle_t} ptotalGroupCount::Ptr{Ptr{UInt32}} end const ze_kernel_suggest_max_cooperative_group_count_params_t = _ze_kernel_suggest_max_cooperative_group_count_params_t # typedef void ( ZE_APICALL * ze_pfnKernelSuggestMaxCooperativeGroupCountCb_t ) ( ze_kernel_suggest_max_cooperative_group_count_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelSuggestMaxCooperativeGroupCountCb_t = Ptr{Cvoid} struct _ze_kernel_set_argument_value_params_t phKernel::Ptr{ze_kernel_handle_t} pargIndex::Ptr{UInt32} pargSize::Ptr{Csize_t} ppArgValue::Ptr{Ptr{Cvoid}} end const ze_kernel_set_argument_value_params_t = _ze_kernel_set_argument_value_params_t # typedef void ( ZE_APICALL * ze_pfnKernelSetArgumentValueCb_t ) ( ze_kernel_set_argument_value_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelSetArgumentValueCb_t = Ptr{Cvoid} struct _ze_kernel_set_indirect_access_params_t phKernel::Ptr{ze_kernel_handle_t} pflags::Ptr{ze_kernel_indirect_access_flags_t} end const ze_kernel_set_indirect_access_params_t = _ze_kernel_set_indirect_access_params_t # typedef void ( ZE_APICALL * ze_pfnKernelSetIndirectAccessCb_t ) ( ze_kernel_set_indirect_access_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelSetIndirectAccessCb_t = Ptr{Cvoid} struct _ze_kernel_get_indirect_access_params_t phKernel::Ptr{ze_kernel_handle_t} ppFlags::Ptr{Ptr{ze_kernel_indirect_access_flags_t}} end const ze_kernel_get_indirect_access_params_t = _ze_kernel_get_indirect_access_params_t # typedef void ( ZE_APICALL * ze_pfnKernelGetIndirectAccessCb_t ) ( ze_kernel_get_indirect_access_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelGetIndirectAccessCb_t = Ptr{Cvoid} struct _ze_kernel_get_source_attributes_params_t phKernel::Ptr{ze_kernel_handle_t} ppSize::Ptr{Ptr{UInt32}} ppString::Ptr{Ptr{Ptr{Cchar}}} end const ze_kernel_get_source_attributes_params_t = _ze_kernel_get_source_attributes_params_t # typedef void ( ZE_APICALL * ze_pfnKernelGetSourceAttributesCb_t ) ( ze_kernel_get_source_attributes_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelGetSourceAttributesCb_t = Ptr{Cvoid} struct _ze_kernel_get_properties_params_t phKernel::Ptr{ze_kernel_handle_t} ppKernelProperties::Ptr{Ptr{ze_kernel_properties_t}} end const ze_kernel_get_properties_params_t = _ze_kernel_get_properties_params_t # typedef void ( ZE_APICALL * ze_pfnKernelGetPropertiesCb_t ) ( ze_kernel_get_properties_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelGetPropertiesCb_t = Ptr{Cvoid} struct _ze_kernel_get_name_params_t phKernel::Ptr{ze_kernel_handle_t} ppSize::Ptr{Ptr{Csize_t}} ppName::Ptr{Ptr{Cchar}} end const ze_kernel_get_name_params_t = _ze_kernel_get_name_params_t # typedef void ( ZE_APICALL * ze_pfnKernelGetNameCb_t ) ( ze_kernel_get_name_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnKernelGetNameCb_t = Ptr{Cvoid} struct _ze_kernel_callbacks_t pfnCreateCb::ze_pfnKernelCreateCb_t pfnDestroyCb::ze_pfnKernelDestroyCb_t pfnSetCacheConfigCb::ze_pfnKernelSetCacheConfigCb_t pfnSetGroupSizeCb::ze_pfnKernelSetGroupSizeCb_t pfnSuggestGroupSizeCb::ze_pfnKernelSuggestGroupSizeCb_t pfnSuggestMaxCooperativeGroupCountCb::ze_pfnKernelSuggestMaxCooperativeGroupCountCb_t pfnSetArgumentValueCb::ze_pfnKernelSetArgumentValueCb_t pfnSetIndirectAccessCb::ze_pfnKernelSetIndirectAccessCb_t pfnGetIndirectAccessCb::ze_pfnKernelGetIndirectAccessCb_t pfnGetSourceAttributesCb::ze_pfnKernelGetSourceAttributesCb_t pfnGetPropertiesCb::ze_pfnKernelGetPropertiesCb_t pfnGetNameCb::ze_pfnKernelGetNameCb_t end const ze_kernel_callbacks_t = _ze_kernel_callbacks_t struct _ze_sampler_create_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} pdesc::Ptr{Ptr{ze_sampler_desc_t}} pphSampler::Ptr{Ptr{ze_sampler_handle_t}} end const ze_sampler_create_params_t = _ze_sampler_create_params_t # typedef void ( ZE_APICALL * ze_pfnSamplerCreateCb_t ) ( ze_sampler_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnSamplerCreateCb_t = Ptr{Cvoid} struct _ze_sampler_destroy_params_t phSampler::Ptr{ze_sampler_handle_t} end const ze_sampler_destroy_params_t = _ze_sampler_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnSamplerDestroyCb_t ) ( ze_sampler_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnSamplerDestroyCb_t = Ptr{Cvoid} struct _ze_sampler_callbacks_t pfnCreateCb::ze_pfnSamplerCreateCb_t pfnDestroyCb::ze_pfnSamplerDestroyCb_t end const ze_sampler_callbacks_t = _ze_sampler_callbacks_t struct _ze_physical_mem_create_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} pdesc::Ptr{Ptr{ze_physical_mem_desc_t}} pphPhysicalMemory::Ptr{Ptr{ze_physical_mem_handle_t}} end const ze_physical_mem_create_params_t = _ze_physical_mem_create_params_t # typedef void ( ZE_APICALL * ze_pfnPhysicalMemCreateCb_t ) ( ze_physical_mem_create_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnPhysicalMemCreateCb_t = Ptr{Cvoid} struct _ze_physical_mem_destroy_params_t phContext::Ptr{ze_context_handle_t} phPhysicalMemory::Ptr{ze_physical_mem_handle_t} end const ze_physical_mem_destroy_params_t = _ze_physical_mem_destroy_params_t # typedef void ( ZE_APICALL * ze_pfnPhysicalMemDestroyCb_t ) ( ze_physical_mem_destroy_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnPhysicalMemDestroyCb_t = Ptr{Cvoid} struct _ze_physical_mem_callbacks_t pfnCreateCb::ze_pfnPhysicalMemCreateCb_t pfnDestroyCb::ze_pfnPhysicalMemDestroyCb_t end const ze_physical_mem_callbacks_t = _ze_physical_mem_callbacks_t struct _ze_virtual_mem_reserve_params_t phContext::Ptr{ze_context_handle_t} ppStart::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} ppptr::Ptr{Ptr{Ptr{Cvoid}}} end const ze_virtual_mem_reserve_params_t = _ze_virtual_mem_reserve_params_t # typedef void ( ZE_APICALL * ze_pfnVirtualMemReserveCb_t ) ( ze_virtual_mem_reserve_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnVirtualMemReserveCb_t = Ptr{Cvoid} struct _ze_virtual_mem_free_params_t phContext::Ptr{ze_context_handle_t} pptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} end const ze_virtual_mem_free_params_t = _ze_virtual_mem_free_params_t # typedef void ( ZE_APICALL * ze_pfnVirtualMemFreeCb_t ) ( ze_virtual_mem_free_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnVirtualMemFreeCb_t = Ptr{Cvoid} struct _ze_virtual_mem_query_page_size_params_t phContext::Ptr{ze_context_handle_t} phDevice::Ptr{ze_device_handle_t} psize::Ptr{Csize_t} ppagesize::Ptr{Ptr{Csize_t}} end const ze_virtual_mem_query_page_size_params_t = _ze_virtual_mem_query_page_size_params_t # typedef void ( ZE_APICALL * ze_pfnVirtualMemQueryPageSizeCb_t ) ( ze_virtual_mem_query_page_size_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnVirtualMemQueryPageSizeCb_t = Ptr{Cvoid} struct _ze_virtual_mem_map_params_t phContext::Ptr{ze_context_handle_t} pptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} phPhysicalMemory::Ptr{ze_physical_mem_handle_t} poffset::Ptr{Csize_t} paccess::Ptr{ze_memory_access_attribute_t} end const ze_virtual_mem_map_params_t = _ze_virtual_mem_map_params_t # typedef void ( ZE_APICALL * ze_pfnVirtualMemMapCb_t ) ( ze_virtual_mem_map_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnVirtualMemMapCb_t = Ptr{Cvoid} struct _ze_virtual_mem_unmap_params_t phContext::Ptr{ze_context_handle_t} pptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} end const ze_virtual_mem_unmap_params_t = _ze_virtual_mem_unmap_params_t # typedef void ( ZE_APICALL * ze_pfnVirtualMemUnmapCb_t ) ( ze_virtual_mem_unmap_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnVirtualMemUnmapCb_t = Ptr{Cvoid} struct _ze_virtual_mem_set_access_attribute_params_t phContext::Ptr{ze_context_handle_t} pptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} paccess::Ptr{ze_memory_access_attribute_t} end const ze_virtual_mem_set_access_attribute_params_t = _ze_virtual_mem_set_access_attribute_params_t # typedef void ( ZE_APICALL * ze_pfnVirtualMemSetAccessAttributeCb_t ) ( ze_virtual_mem_set_access_attribute_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnVirtualMemSetAccessAttributeCb_t = Ptr{Cvoid} struct _ze_virtual_mem_get_access_attribute_params_t phContext::Ptr{ze_context_handle_t} pptr::Ptr{Ptr{Cvoid}} psize::Ptr{Csize_t} paccess::Ptr{Ptr{ze_memory_access_attribute_t}} poutSize::Ptr{Ptr{Csize_t}} end const ze_virtual_mem_get_access_attribute_params_t = _ze_virtual_mem_get_access_attribute_params_t # typedef void ( ZE_APICALL * ze_pfnVirtualMemGetAccessAttributeCb_t ) ( ze_virtual_mem_get_access_attribute_params_t * params , ze_result_t result , void * pTracerUserData , void * * ppTracerInstanceUserData ) const ze_pfnVirtualMemGetAccessAttributeCb_t = Ptr{Cvoid} struct _ze_virtual_mem_callbacks_t pfnReserveCb::ze_pfnVirtualMemReserveCb_t pfnFreeCb::ze_pfnVirtualMemFreeCb_t pfnQueryPageSizeCb::ze_pfnVirtualMemQueryPageSizeCb_t pfnMapCb::ze_pfnVirtualMemMapCb_t pfnUnmapCb::ze_pfnVirtualMemUnmapCb_t pfnSetAccessAttributeCb::ze_pfnVirtualMemSetAccessAttributeCb_t pfnGetAccessAttributeCb::ze_pfnVirtualMemGetAccessAttributeCb_t end const ze_virtual_mem_callbacks_t = _ze_virtual_mem_callbacks_t struct _ze_callbacks_t Global::ze_global_callbacks_t Driver::ze_driver_callbacks_t Device::ze_device_callbacks_t Context::ze_context_callbacks_t CommandQueue::ze_command_queue_callbacks_t CommandList::ze_command_list_callbacks_t Fence::ze_fence_callbacks_t EventPool::ze_event_pool_callbacks_t Event::ze_event_callbacks_t Image::ze_image_callbacks_t Module::ze_module_callbacks_t ModuleBuildLog::ze_module_build_log_callbacks_t Kernel::ze_kernel_callbacks_t Sampler::ze_sampler_callbacks_t PhysicalMem::ze_physical_mem_callbacks_t Mem::ze_mem_callbacks_t VirtualMem::ze_virtual_mem_callbacks_t end const ze_callbacks_t = _ze_callbacks_t # Skipping MacroDefinition: ZE_APIEXPORT __attribute__ ( ( visibility ( "default" ) ) ) # Skipping MacroDefinition: ZE_DLLEXPORT __attribute__ ( ( visibility ( "default" ) ) ) const ZE_MAX_IPC_HANDLE_SIZE = 64 const ZE_MAX_UUID_SIZE = 16 const ZE_API_VERSION_CURRENT_M = ZE_MAKE_VERSION(1, 13) const ZE_MAX_DRIVER_UUID_SIZE = 16 const ZE_MAX_EXTENSION_NAME = 256 const ZE_MAX_DEVICE_UUID_SIZE = 16 const ZE_MAX_DEVICE_NAME = 256 const ZE_SUBGROUPSIZE_COUNT = 8 const ZE_MAX_NATIVE_KERNEL_UUID_SIZE = 16 const ZE_MAX_KERNEL_UUID_SIZE = 16 const ZE_MAX_MODULE_UUID_SIZE = 16 const ZE_MODULE_PROGRAM_EXP_NAME = "ZE_experimental_module_program" const ZE_RAYTRACING_EXT_NAME = "ZE_extension_raytracing" const ZE_FLOAT_ATOMICS_EXT_NAME = "ZE_extension_float_atomics" const ZE_GLOBAL_OFFSET_EXP_NAME = "ZE_experimental_global_offset" const ZE_RELAXED_ALLOCATION_LIMITS_EXP_NAME = "ZE_experimental_relaxed_allocation_limits" const ZE_GET_KERNEL_BINARY_EXP_NAME = "ZE_extension_kernel_binary_exp" const ZE_DRIVER_DDI_HANDLES_EXT_NAME = "ZE_extension_driver_ddi_handles" const ZE_EXTERNAL_SEMAPHORES_EXTENSION_NAME = "ZE_extension_external_semaphores" const ZE_CACHELINE_SIZE_EXT_NAME = "ZE_extension_device_cache_line_size" const ZE_RTAS_EXT_NAME = "ZE_extension_rtas" const ZE_DEVICE_VECTOR_SIZES_EXT_NAME = "ZE_extension_device_vector_sizes" const ZE_CACHE_RESERVATION_EXT_NAME = "ZE_extension_cache_reservation" const ZE_EVENT_QUERY_TIMESTAMPS_EXP_NAME = "ZE_experimental_event_query_timestamps" const ZE_IMAGE_MEMORY_PROPERTIES_EXP_NAME = "ZE_experimental_image_memory_properties" const ZE_IMAGE_VIEW_EXT_NAME = "ZE_extension_image_view" const ZE_IMAGE_VIEW_EXP_NAME = "ZE_experimental_image_view" const ZE_IMAGE_VIEW_PLANAR_EXT_NAME = "ZE_extension_image_view_planar" const ZE_IMAGE_VIEW_PLANAR_EXP_NAME = "ZE_experimental_image_view_planar" const ZE_KERNEL_SCHEDULING_HINTS_EXP_NAME = "ZE_experimental_scheduling_hints" const ZE_LINKONCE_ODR_EXT_NAME = "ZE_extension_linkonce_odr" const ZE_CONTEXT_POWER_SAVING_HINT_EXP_NAME = "ZE_experimental_power_saving_hint" const ZE_SUBGROUPS_EXT_NAME = "ZE_extension_subgroups" const ZE_EU_COUNT_EXT_NAME = "ZE_extension_eu_count" const ZE_PCI_PROPERTIES_EXT_NAME = "ZE_extension_pci_properties" const ZE_SRGB_EXT_NAME = "ZE_extension_srgb" const ZE_IMAGE_COPY_EXT_NAME = "ZE_extension_image_copy" const ZE_IMAGE_QUERY_ALLOC_PROPERTIES_EXT_NAME = "ZE_extension_image_query_alloc_properties" const ZE_LINKAGE_INSPECTION_EXT_NAME = "ZE_extension_linkage_inspection" const ZE_MEMORY_COMPRESSION_HINTS_EXT_NAME = "ZE_extension_memory_compression_hints" const ZE_MEMORY_FREE_POLICIES_EXT_NAME = "ZE_extension_memory_free_policies" const ZE_BANDWIDTH_PROPERTIES_EXP_NAME = "ZE_experimental_bandwidth_properties" const ZE_DEVICE_LUID_EXT_NAME = "ZE_extension_device_luid" const ZE_MAX_DEVICE_LUID_SIZE_EXT = 8 const ZE_FABRIC_EXP_NAME = "ZE_experimental_fabric" const ZE_MAX_FABRIC_EDGE_MODEL_EXP_SIZE = 256 const ZE_DEVICE_MEMORY_PROPERTIES_EXT_NAME = "ZE_extension_device_memory_properties" const ZE_BFLOAT16_CONVERSIONS_EXT_NAME = "ZE_extension_bfloat16_conversions" const ZE_DEVICE_IP_VERSION_EXT_NAME = "ZE_extension_device_ip_version" const ZE_KERNEL_MAX_GROUP_SIZE_PROPERTIES_EXT_NAME = "ZE_extension_kernel_max_group_size_properties" const ZE_SUB_ALLOCATIONS_EXP_NAME = "ZE_experimental_sub_allocations" const ZE_EVENT_QUERY_KERNEL_TIMESTAMPS_EXT_NAME = "ZE_extension_event_query_kernel_timestamps" const ZE_RTAS_BUILDER_EXP_NAME = "ZE_experimental_rtas_builder" const ZE_EVENT_POOL_COUNTER_BASED_EXP_NAME = "ZE_experimental_event_pool_counter_based" const ZE_BINDLESS_IMAGE_EXP_NAME = "ZE_experimental_bindless_image" const ZE_COMMAND_LIST_CLONE_EXP_NAME = "ZE_experimental_command_list_clone" const ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_NAME = "ZE_experimental_immediate_command_list_append" const ZE_MUTABLE_COMMAND_LIST_EXP_NAME = "ZE_experimental_mutable_command_list" ================================================ FILE: lib/level-zero/libze_aliases.jl ================================================ ================================================ FILE: lib/level-zero/memory.jl ================================================ # Raw memory management export device_alloc, host_alloc, shared_alloc, free, properties, lookup_alloc # # untyped buffers # abstract type AbstractBuffer end Base.convert(T::Type{<:Union{Ptr,ZePtr}}, buf::AbstractBuffer) = throw(ArgumentError("Illegal conversion of a $(typeof(buf)) to a $T")) # ccall integration # # taking the pointer of a buffer means returning the underlying pointer, # and not the pointer of the buffer object itself. Base.unsafe_convert(P::Type{<:Union{Ptr,ZePtr}}, buf::AbstractBuffer) = convert(P, buf) function free(buf::AbstractBuffer; policy=nothing) ctx = context(buf) if policy === nothing zeMemFree(ctx, buf) else desc_ref = Ref(ze_memory_free_ext_desc_t(; freePolicy=policy)) zeMemFreeExt(ctx, desc_ref, buf) end end ## device buffer """ DeviceBuffer A buffer of device memory, owned by a specific device. Generally, may only be accessed by the device that owns it. """ struct DeviceBuffer <: AbstractBuffer ptr::ZePtr{Cvoid} bytesize::Int context::ZeContext device::ZeDevice end function device_alloc(ctx::ZeContext, dev::ZeDevice, bytesize::Integer, alignment::Integer=1; flags=0, ordinal::Integer=0) relaxed_allocation_ref = Ref(ze_relaxed_allocation_limits_exp_desc_t(; flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE, )) GC.@preserve relaxed_allocation_ref begin if bytesize > properties(dev).maxMemAllocSize desc_ref = Ref(ze_device_mem_alloc_desc_t(; flags, ordinal)) link_extensions(desc_ref, relaxed_allocation_ref) else desc_ref = Ref(ze_device_mem_alloc_desc_t(; flags, ordinal)) end ptr_ref = Ref{Ptr{Cvoid}}() zeMemAllocDevice(ctx, desc_ref, bytesize, alignment, dev, ptr_ref) return DeviceBuffer(reinterpret(ZePtr{Cvoid}, ptr_ref[]), bytesize, ctx, dev) end end Base.pointer(buf::DeviceBuffer) = buf.ptr Base.sizeof(buf::DeviceBuffer) = buf.bytesize context(buf::DeviceBuffer) = buf.context device(buf::DeviceBuffer) = buf.device Base.show(io::IO, buf::DeviceBuffer) = @printf(io, "DeviceBuffer(%s at %p)", Base.format_bytes(sizeof(buf)), pointer(buf)) Base.convert(::Type{ZePtr{T}}, buf::DeviceBuffer) where {T} = convert(ZePtr{T}, pointer(buf)) ## host buffer """ HostBuffer A buffer of memory on the host. May be accessed by the host, and all devices within the host driver. Frequently used as staging areas to transfer data to or from devices. Note that these buffers need to be made resident to the device, e.g., by using the ZE_KERNEL_FLAG_FORCE_RESIDENCY module flag, the ZE_KERNEL_SET_ATTR_INDIRECT_HOST_ACCESS kernel attribute, or by calling zeDeviceMakeMemoryResident. """ struct HostBuffer <: AbstractBuffer ptr::Ptr{Cvoid} bytesize::Int context::ZeContext end function host_alloc(ctx::ZeContext, bytesize::Integer, alignment::Integer=1; flags=0) desc_ref = Ref(ze_host_mem_alloc_desc_t(; flags)) ptr_ref = Ref{Ptr{Cvoid}}() zeMemAllocHost(ctx, desc_ref, bytesize, alignment, ptr_ref) return HostBuffer(ptr_ref[], bytesize, ctx) end Base.pointer(buf::HostBuffer) = buf.ptr Base.sizeof(buf::HostBuffer) = buf.bytesize context(buf::HostBuffer) = buf.context device(buf::HostBuffer) = nothing Base.show(io::IO, buf::HostBuffer) = @printf(io, "HostBuffer(%s at %p)", Base.format_bytes(sizeof(buf)), Int(pointer(buf))) Base.convert(::Type{Ptr{T}}, buf::HostBuffer) where {T} = convert(Ptr{T}, pointer(buf)) Base.convert(::Type{ZePtr{T}}, buf::HostBuffer) where {T} = reinterpret(ZePtr{T}, pointer(buf)) ## shared buffer """ SharedBuffer A managed buffer that is shared between the host and one or more devices. """ struct SharedBuffer <: AbstractBuffer ptr::ZePtr{Cvoid} bytesize::Int context::ZeContext device::Union{Nothing,ZeDevice} end function shared_alloc(ctx::ZeContext, dev::Union{Nothing,ZeDevice}, bytesize::Integer, alignment::Integer=1; host_flags=0, device_flags=0, ordinal::Integer=0) relaxed_allocation_ref = Ref(ze_relaxed_allocation_limits_exp_desc_t(; flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE, )) GC.@preserve relaxed_allocation_ref begin device_desc_ref = if dev !== nothing && bytesize > properties(dev).maxMemAllocSize pNext = Base.unsafe_convert(Ptr{Cvoid}, relaxed_allocation_ref) Ref(ze_device_mem_alloc_desc_t(; flags=device_flags, ordinal, pNext)) else Ref(ze_device_mem_alloc_desc_t(; flags=device_flags, ordinal)) end host_desc_ref = Ref(ze_host_mem_alloc_desc_t(; flags=host_flags)) ptr_ref = Ref{Ptr{Cvoid}}() zeMemAllocShared(ctx, device_desc_ref, host_desc_ref, bytesize, alignment, something(dev, C_NULL), ptr_ref) return SharedBuffer(reinterpret(ZePtr{Cvoid}, ptr_ref[]), bytesize, ctx, dev) end end Base.pointer(buf::SharedBuffer) = buf.ptr Base.sizeof(buf::SharedBuffer) = buf.bytesize context(buf::SharedBuffer) = buf.context device(buf::SharedBuffer) = buf.device Base.show(io::IO, buf::SharedBuffer) = @printf(io, "SharedBuffer(%s at %p)", Base.format_bytes(sizeof(buf)), Int(pointer(buf))) Base.convert(::Type{Ptr{T}}, buf::SharedBuffer) where {T} = convert(Ptr{T}, reinterpret(Ptr{Cvoid}, pointer(buf))) Base.convert(::Type{ZePtr{T}}, buf::SharedBuffer) where {T} = convert(ZePtr{T}, pointer(buf)) ## properties function properties(buf::AbstractBuffer) props_ref = Ref(ze_memory_allocation_properties_t()) dev_ref = Ref(ze_device_handle_t()) zeMemGetAllocProperties(buf.context, pointer(buf), props_ref, dev_ref) props = props_ref[] return ( device=ZeDevice(dev_ref[], buf.context.driver), type=props.type, id=props.id, ) end struct UnknownBuffer <: AbstractBuffer ptr::Ptr{Cvoid} bytesize::Int context::ZeContext end Base.pointer(buf::UnknownBuffer) = buf.ptr Base.sizeof(buf::UnknownBuffer) = buf.bytesize context(buf::UnknownBuffer) = buf.context Base.show(io::IO, buf::UnknownBuffer) = @printf(io, "UnknownBuffer(%s at %p)", Base.format_bytes(sizeof(buf)), Int(pointer(buf))) function lookup_alloc(ctx::ZeContext, ptr::Union{Ptr,ZePtr}) base_ref = Ref{Ptr{Cvoid}}() bytesize_ref = Ref{Csize_t}() zeMemGetAddressRange(ctx, ptr, base_ref, bytesize_ref) buf = UnknownBuffer(base_ref[], bytesize_ref[], ctx) props = properties(buf) return if props.type == ZE_MEMORY_TYPE_HOST HostBuffer(pointer(buf), sizeof(buf), ctx) elseif props.type == ZE_MEMORY_TYPE_DEVICE DeviceBuffer(reinterpret(ZePtr{Cvoid}, pointer(buf)), sizeof(buf), ctx, props.device) elseif props.type == ZE_MEMORY_TYPE_SHARED SharedBuffer(reinterpret(ZePtr{Cvoid}, pointer(buf)), sizeof(buf), ctx, props.device) else buf end end ================================================ FILE: lib/level-zero/module.jl ================================================ export ZeModule mutable struct ZeModule handle::ze_module_handle_t context::ZeContext device::ZeDevice function ZeModule(ctx::ZeContext, dev::ZeDevice, image; build_flags="", log::Bool=true) log_ref = if log log_ref = Ref{ze_module_build_log_handle_t}() else C_NULL end constants = Ref(ze_module_constants_t( 0, C_NULL, C_NULL )) # compile the module GC.@preserve image build_flags constants begin desc_ref = Ref(ze_module_desc_t(; format=ZE_MODULE_FORMAT_IL_SPIRV, inputSize=sizeof(image), pInputModule=pointer(image), pBuildFlags=pointer(build_flags), pConstants=Base.unsafe_convert(Ptr{ze_module_constants_t}, constants) )) handle_ref = Ref{ze_module_handle_t}() res = unchecked_zeModuleCreate(ctx, dev, desc_ref, handle_ref, log_ref) end # read the log if log_ref !== C_NULL log_size_ref = Ref{Csize_t}(0) zeModuleBuildLogGetString(log_ref[], log_size_ref, C_NULL) log_buf = Vector{UInt8}(undef, log_size_ref[]) zeModuleBuildLogGetString(log_ref[], log_size_ref, pointer(log_buf)) zeModuleBuildLogDestroy(log_ref[]) log = String(log_buf)[1:end-1] # strip null terminator if !isempty(log) if res == ZE_RESULT_ERROR_MODULE_BUILD_FAILURE @error """Module compilation failed: $log""" else @debug """Build log: $log""" end end end if res != RESULT_SUCCESS throw_api_error(res) end obj = new(handle_ref[], ctx, dev) finalizer(obj) do obj zeModuleDestroy(obj) end return obj end end Base.unsafe_convert(::Type{ze_module_handle_t}, mod::ZeModule) = mod.handle Base.:(==)(a::ZeModule, b::ZeModule) = a.handle == b.handle Base.hash(e::ZeModule, h::UInt) = hash(e.handle, h) ## kernels export ZeKernel mutable struct ZeKernel mod::ZeModule handle::ze_kernel_handle_t function ZeKernel(mod, name) GC.@preserve name begin desc_ref = Ref(ze_kernel_desc_t(; pKernelName=pointer(name))) handle_ref = Ref{ze_kernel_handle_t}() zeKernelCreate(mod, desc_ref, handle_ref) end obj = new(mod, handle_ref[]) finalizer(obj) do obj zeKernelDestroy(obj) end obj end end Base.unsafe_convert(::Type{ze_kernel_handle_t}, kernel::ZeKernel) = kernel.handle Base.:(==)(a::ZeKernel, b::ZeKernel) = a.handle == b.handle Base.hash(e::ZeKernel, h::UInt) = hash(e.handle, h) ## kernel iteration export kernels struct ZeModuleKernelDict <: AbstractDict{String,ZeKernel} mod::ZeModule names::Vector{String} function ZeModuleKernelDict(mod) count_ref = Ref{UInt32}(0) zeModuleGetKernelNames(mod, count_ref, C_NULL) names_ref = Vector{Ptr{Cchar}}(undef, count_ref[]) zeModuleGetKernelNames(mod, count_ref, names_ref) new(mod, unsafe_string.(names_ref)) end end kernels(mod::ZeModule) = ZeModuleKernelDict(mod) function Base.iterate(dict::ZeModuleKernelDict, i=1) i > length(dict.names) && return nothing name = dict.names[i] kernel = ZeKernel(dict.mod, name) return (Pair{String,ZeKernel}(name, kernel), i+1) end Base.length(dict::ZeModuleKernelDict) = length(dict.names) function Base.get(dict::ZeModuleKernelDict, name::AbstractString, def) in(name, dict.names) || return def ZeKernel(dict.mod, name) end ## group sizes export ZeDim, suggest_groupsize, groupsize! """ ZeDim3(x) ZeDim3((x,)) ZeDim3((x, y)) ZeDim3((x, y, x)) A type used to specify dimensions, consisting of 3 integers for respectively the `x`, `y` and `z` dimension. Unspecified dimensions default to `1`. Often accepted as argument through the `ZeDim` type alias, allowing to pass dimensions as a plain integer or a tuple without having to construct an explicit `ZeDim3` object. """ struct ZeDim3 x::Int y::Int z::Int end ZeDim3(dims::Integer) = ZeDim3(dims, 1, 1) ZeDim3(dims::NTuple{1,<:Integer}) = ZeDim3(dims[1], 1, 1) ZeDim3(dims::NTuple{2,<:Integer}) = ZeDim3(dims[1], dims[2], 1) ZeDim3(dims::NTuple{3,<:Integer}) = ZeDim3(dims[1], dims[2], dims[3]) # Type alias for conveniently specifying the dimensions # (e.g. `(len, 2)` instead of `ZeDim3((len, 2))`) const ZeDim = Union{Integer, Tuple{Integer}, Tuple{Integer, Integer}, Tuple{Integer, Integer, Integer}} function suggest_groupsize(kernel::ZeKernel, global_sz::ZeDim) global_sz = ZeDim3(global_sz) group_sz_x = Ref{UInt32}() group_sz_y = Ref{UInt32}() group_sz_z = Ref{UInt32}() zeKernelSuggestGroupSize(kernel, global_sz.x, global_sz.y, global_sz.z, group_sz_x, group_sz_y, group_sz_z) return ZeDim3(group_sz_x[], group_sz_y[], group_sz_z[]) end function groupsize!(kernel::ZeKernel, sz::ZeDim) sz = ZeDim3(sz) zeKernelSetGroupSize(kernel, sz.x, sz.y, sz.z) end ## arguments export arguments struct ZeKernelArgumentList kernel::ZeKernel end arguments(kernel::ZeKernel) = ZeKernelArgumentList(kernel) function Base.setindex!(args::ZeKernelArgumentList, value::Any, index::Integer) @assert isbits(value) zeKernelSetArgumentValue(args.kernel, index-1, Core.sizeof(value), Base.RefValue(value)) end ## attributes export indirect_access, indirect_access!, source_attributes function indirect_access(kernel::ZeKernel) flags_ref = Ref{ze_kernel_indirect_access_flags_t}() zeKernelGetIndirectAccess(kernel, flags_ref) return flags_ref[] end indirect_access!(kernel::ZeKernel, flags) = zeKernelSetIndirectAccess(kernel, flags) function source_attributes(kernel::ZeKernel) size_ref = Ref{UInt32}(0) zeKernelGetSourceAttributes(kernel, size_ref, C_NULL) data = Vector{UInt8}(undef, size_ref[]) ptr_ref = Ref{Ptr{Cchar}}(pointer(data)) zeKernelGetSourceAttributes(kernel, size_ref, ptr_ref) str = String(data) # the attribute string is null-terminated, with attributes separated by space return split(str[1:end-1]) end ## properties export properties function properties(kernel::ZeKernel) props_ref = Ref(ze_kernel_properties_t()) preferred_group_size_props_ref = Ref(ze_kernel_preferred_group_size_properties_t()) link_extensions(props_ref, preferred_group_size_props_ref) if haskey(oneL0.extension_properties(kernel.mod.context.driver), "ZE_extension_kernel_max_group_size_properties") # TODO: memoize max_group_size_props_ref = Ref(ze_kernel_max_group_size_properties_ext_t()) link_extensions(preferred_group_size_props_ref, max_group_size_props_ref) else max_group_size_props_ref = nothing end zeKernelGetProperties(kernel, props_ref) props = props_ref[] return ( numKernelArgs=Int(props.numKernelArgs), requiredGroupSize=ZeDim3(props.requiredGroupSizeX, props.requiredGroupSizeY, props.requiredGroupSizeZ), requiredNumSubGroups=Int(props.requiredNumSubGroups), requiredSubgroupSize=Int(props.requiredSubgroupSize), maxSubgroupSize=Int(props.maxSubgroupSize), maxNumSubgroups=Int(props.maxNumSubgroups), localMemSize=Int(props.localMemSize), privateMemSize=Int(props.privateMemSize), spillMemSize=Int(props.spillMemSize), kernel_uuid=Base.UUID(reinterpret(UInt128, [props.uuid.kid...])[1]), module_uuid=Base.UUID(reinterpret(UInt128, [props.uuid.mid...])[1]), preferredGroupSize=Int(preferred_group_size_props_ref[].preferredMultiple), maxGroupSize=max_group_size_props_ref === nothing ? missing : Int(max_group_size_props_ref[].maxGroupSize) ) end ## execution export append_launch! ze_group_count_t(dim::ZeDim3) = ze_group_count_t(dim.x, dim.y, dim.z) function append_launch!(list, kernel, group_count::ZeDim, signal_event=nothing, wait_events::ZeEvent...) group_count = ze_group_count_t(ZeDim3(group_count)) zeCommandListAppendLaunchKernel(list, kernel, Ref(group_count), something(signal_event, C_NULL), length(wait_events), [wait_events...]) end ================================================ FILE: lib/level-zero/oneL0.jl ================================================ module oneL0 using ..APIUtils using CEnum using Printf using Libdl if Sys.iswindows() const libze_loader = "ze_loader" else using NEO_jll using oneAPI_Level_Zero_Loader_jll end include("utils.jl") include("pointer.jl") # core API include("common.jl") include("libze.jl") # level zero's structure types are often assumed to be zero-initialized (`= {}` in C). # Julia's memory is not, so define default constructors that ensure everything is zero. # # at the same time, our structs are immutable, so we can't just memset to 0 and set fields. # so instead the constructor we generate has keyword arguments for every fields, and the # default value for every field is set to 0 (can be overridden by defining `zeroinit`). # # TODO: is it really required to (1) zero-initialize memory and (2) set the stype field? # none of these are documented... # TODO: add support for conveniently linking extension objects in pNext for (structure_type_enum, _) in CEnum.name_value_pairs(ze_structure_type_t) structure_type_name = string(structure_type_enum) @assert startswith(structure_type_name, "ZE_STRUCTURE_TYPE") T = Symbol("ze_" * lowercase(structure_type_name[19:end]) * "_t") if isdefined(oneL0, T) struct_typ = getfield(oneL0, T) args = Expr[] for field in fieldnames(struct_typ) field_type = fieldtype(struct_typ, field) field_value = if field_type == ze_structure_type_t :($structure_type_enum) else :(zeroinit($field_type)) end push!(args, Expr(:kw, field, field_value)) end @eval begin @inline function $T(;$(args...)) $(T)($(fieldnames(struct_typ)...)) end end end end # alternative approach: make the structs mutable, and do memset + setfield zeroinit(::Type{T}) where {T} = convert(T, 0) zeroinit(::Type{T}) where {T<:CEnum.Cenum} = T(0) zeroinit(::Type{T}) where {T<:NTuple} = ntuple(_->zeroinit(T.parameters[1]), length(T.parameters)) zeroinit(::Type{ze_driver_uuid_t}) = ze_driver_uuid_t(ntuple(_->zero(UInt8), 16)) zeroinit(::Type{ze_device_uuid_t}) = ze_device_uuid_t(ntuple(_->zero(UInt8), 16)) zeroinit(::Type{_ze_native_kernel_uuid_t}) = _ze_native_kernel_uuid_t(ntuple(_->zero(UInt8), 16)) zeroinit(::Type{ze_kernel_uuid_t}) = ze_kernel_uuid_t(ntuple(_->zero(UInt8), 16), ntuple(_->zero(UInt8), 16)) # link extension objects in pNext function link_extensions(refs...) length(refs) >= 2 || return for (parent, child) in zip(refs[1:end-1], refs[2:end]) pNext = Base.unsafe_convert(Ptr{Cvoid}, child) typ = eltype(parent) @assert fieldnames(typ)[2] == :pNext field = Base.unsafe_convert(Ptr{Cvoid}, parent) + fieldoffset(typ, 2) field = convert(Ptr{Ptr{Cvoid}}, field) unsafe_store!(field, pNext) end return end # core wrappers include("error.jl") include("driver.jl") include("device.jl") # Define OutOfGPUMemoryError after device.jl to ensure ZeDevice is available export OutOfGPUMemoryError """ OutOfGPUMemoryError(sz::Integer=0, dev::ZeDevice) An operation allocated too much GPU memory. """ struct OutOfGPUMemoryError <: Exception sz::Int dev::Union{ZeDevice, Nothing} function OutOfGPUMemoryError(sz::Integer=0, dev::Union{ZeDevice, Nothing}=nothing) new(sz, dev) end end function Base.showerror(io::IO, err::OutOfGPUMemoryError) print(io, "Out of GPU memory") if err.sz > 0 print(io, " trying to allocate $(Base.format_bytes(err.sz))") end if err.dev !== nothing print(" on device $(properties(err.dev).name)") if length(memory_properties(err.dev)) == 1 # XXX: how to handle multiple memories? print(" with $(Base.format_bytes(only(memory_properties(err.dev)).totalSize))") end end return io end include("context.jl") include("cmdqueue.jl") include("cmdlist.jl") include("fence.jl") include("event.jl") include("barrier.jl") include("module.jl") include("memory.jl") include("copy.jl") include("residency.jl") const functional = Ref{Bool}(false) const validation_layer = Ref{Bool}() const parameter_validation = Ref{Bool}() function __init__() precompiling = ccall(:jl_generating_output, Cint, ()) != 0 precompiling && return if Sys.iswindows() if Libdl.dlopen(libze_loader; throw_error=false) === nothing @error "The oneAPI Level Zero loader was not found. Please ensure the Intel GPU drivers are installed." return end else if !oneAPI_Level_Zero_Loader_jll.is_available() @error """No oneAPI Level Zero loader found for your platform. Currently, only Linux x86 is supported. If you have a local oneAPI toolchain, you can use that; refer to the documentation for more details.""" return end if !NEO_jll.is_available() @error """No oneAPI driver found for your platform. Currently, only Linux x86_64 is supported. If you have a local oneAPI toolchain, you can use that; refer to the documentation for more details.""" return end end try zeInit(0) catch err # Handle the specific case where no oneAPI device is available if err isa ZeError && err.code == RESULT_ERROR_UNINITIALIZED functional[] = false return end # For other errors, still report them as errors @error "Failed to initialize oneAPI" exception=(err,catch_backtrace()) functional[] = false return end # Check if there are actually any drivers/devices available try drv_count = Ref{UInt32}(0) zeDriverGet(drv_count, C_NULL) if drv_count[] == 0 @info "oneAPI initialized but no drivers found. oneAPI.jl will not be functional." functional[] = false return end catch err @error "Failed to enumerate oneAPI drivers" exception = (err, catch_backtrace()) functional[] = false return end functional[] = true validation_layer[] = parse(Bool, get(ENV, "ZE_ENABLE_VALIDATION_LAYER", "false")) parameter_validation[] = parse(Bool, get(ENV, "ZE_ENABLE_PARAMETER_VALIDATION", "false")) end end ================================================ FILE: lib/level-zero/pointer.jl ================================================ # pointer types export ZePtr, ZE_NULL, PtrOrZePtr, ZeRef, RefOrZeRef # # Device pointer # """ ZePtr{T} A memory address that refers to data of type `T` that is accessible from q device. A `ZePtr` is ABI compatible with regular `Ptr` objects, e.g. it can be used to `ccall` a function that expects a `Ptr` to device memory, but it prevents erroneous conversions between the two. """ ZePtr if sizeof(Ptr{Cvoid}) == 8 primitive type ZePtr{T} 64 end else primitive type ZePtr{T} 32 end end # constructor ZePtr{T}(x::Union{Int,UInt,ZePtr}) where {T} = Base.bitcast(ZePtr{T}, x) const ZE_NULL = ZePtr{Cvoid}(0) ## getters Base.eltype(::Type{<:ZePtr{T}}) where {T} = T ## conversions # to and from integers ## pointer to integer Base.convert(::Type{T}, x::ZePtr) where {T<:Integer} = T(UInt(x)) ## integer to pointer Base.convert(::Type{ZePtr{T}}, x::Union{Int,UInt}) where {T} = ZePtr{T}(x) Base.Int(x::ZePtr) = Base.bitcast(Int, x) Base.UInt(x::ZePtr) = Base.bitcast(UInt, x) # between regular and oneAPI pointers Base.convert(::Type{<:Ptr}, p::ZePtr) = throw(ArgumentError("cannot convert a device pointer to a host pointer")) # between oneAPI pointers Base.convert(::Type{ZePtr{T}}, p::ZePtr) where {T} = Base.bitcast(ZePtr{T}, p) # defer conversions to unsafe_convert Base.cconvert(::Type{<:ZePtr}, x) = x # fallback for unsafe_convert Base.unsafe_convert(::Type{P}, x::ZePtr) where {P<:ZePtr} = convert(P, x) # from arrays Base.unsafe_convert(::Type{ZePtr{S}}, a::AbstractArray{T}) where {S,T} = convert(ZePtr{S}, Base.unsafe_convert(ZePtr{T}, a)) Base.unsafe_convert(::Type{ZePtr{T}}, a::AbstractArray{T}) where {T} = error("conversion to pointer not defined for $(typeof(a))") ## limited pointer arithmetic & comparison Base.isequal(x::ZePtr, y::ZePtr) = (x === y) Base.isless(x::ZePtr{T}, y::ZePtr{T}) where {T} = x < y Base.:(==)(x::ZePtr, y::ZePtr) = UInt(x) == UInt(y) Base.:(<)(x::ZePtr, y::ZePtr) = UInt(x) < UInt(y) Base.:(-)(x::ZePtr, y::ZePtr) = UInt(x) - UInt(y) Base.:(+)(x::ZePtr, y::Integer) = oftype(x, UInt(x) + (y % UInt) % UInt) Base.:(-)(x::ZePtr, y::Integer) = oftype(x, UInt(x) - (y % UInt) % UInt) Base.:(+)(x::Integer, y::ZePtr) = y + x # # Host or device pointer # """ PtrOrZePtr{T} A special pointer type, ABI-compatible with both `Ptr` and `ZePtr`, for use in `ccall` expressions to convert values to either a device or a host type (in that order). This is required for APIs which accept pointers that either point to host or device memory. """ PtrOrZePtr if sizeof(Ptr{Cvoid}) == 8 primitive type PtrOrZePtr{T} 64 end else primitive type PtrOrZePtr{T} 32 end end function Base.cconvert(::Type{PtrOrZePtr{T}}, val) where {T} # `cconvert` is always implemented for both `Ptr` and `ZePtr`, so pick the first result # that has done an actual conversion dev_val = Base.cconvert(ZePtr{T}, val) if dev_val !== val return dev_val end host_val = Base.cconvert(Ptr{T}, val) if host_val !== val return host_val end return val end function Base.unsafe_convert(::Type{PtrOrZePtr{T}}, val) where {T} ptr = if Core.Compiler.return_type(Base.unsafe_convert, Tuple{Type{Ptr{T}}, typeof(val)}) !== Union{} Base.unsafe_convert(Ptr{T}, val) elseif Core.Compiler.return_type(Base.unsafe_convert, Tuple{Type{ZePtr{T}}, typeof(val)}) !== Union{} Base.unsafe_convert(ZePtr{T}, val) else throw(ArgumentError("cannot convert to either a host or device pointer")) end return Base.bitcast(PtrOrZePtr{T}, ptr) end # # Device reference objects # if sizeof(Ptr{Cvoid}) == 8 primitive type ZeRef{T} 64 end else primitive type ZeRef{T} 32 end end # general methods for ZeRef{T} type Base.eltype(x::Type{<:ZeRef{T}}) where {T} = @isdefined(T) ? T : Any Base.convert(::Type{ZeRef{T}}, x::ZeRef{T}) where {T} = x # conversion or the actual ccall Base.unsafe_convert(::Type{ZeRef{T}}, x::ZeRef{T}) where {T} = Base.bitcast(ZeRef{T}, Base.unsafe_convert(ZePtr{T}, x)) Base.unsafe_convert(::Type{ZeRef{T}}, x) where {T} = Base.bitcast(ZeRef{T}, Base.unsafe_convert(ZePtr{T}, x)) # ZeRef from literal pointer Base.convert(::Type{ZeRef{T}}, x::ZePtr{T}) where {T} = x # indirect constructors using ZeRef Base.convert(::Type{ZeRef{T}}, x) where {T} = ZeRef{T}(x) ## ZeRef object backed by an array at index i struct ZeRefArray{T,A<:AbstractArray{T}} <: Ref{T} x::A i::Int ZeRefArray{T,A}(x,i) where {T,A<:AbstractArray{T}} = new(x,i) end ZeRefArray{T}(x::AbstractArray{T}, i::Int=1) where {T} = ZeRefArray{T,typeof(x)}(x, i) ZeRefArray(x::AbstractArray{T}, i::Int=1) where {T} = ZeRefArray{T}(x, i) Base.convert(::Type{ZeRef{T}}, x::AbstractArray{T}) where {T} = ZeRefArray(x, 1) function Base.unsafe_convert(P::Type{ZePtr{T}}, b::ZeRefArray{T}) where T return pointer(b.x, b.i) end function Base.unsafe_convert(P::Type{ZePtr{Any}}, b::ZeRefArray{Any}) return convert(P, pointer(b.x, b.i)) end Base.unsafe_convert(::Type{ZePtr{Cvoid}}, b::ZeRefArray{T}) where {T} = convert(ZePtr{Cvoid}, Base.unsafe_convert(ZePtr{T}, b)) ## Union with all ZeRef 'subtypes' const ZeRefs{T} = Union{ZePtr{T}, ZeRefArray{T}} ## RefOrZeRef if sizeof(Ptr{Cvoid}) == 8 primitive type RefOrZeRef{T} 64 end else primitive type RefOrZeRef{T} 32 end end Base.convert(::Type{RefOrZeRef{T}}, x::Union{RefOrZeRef{T}, Ref{T}, ZeRef{T}, ZeRefs{T}}) where {T} = x # prefer conversion to CPU ref: this is generally cheaper Base.convert(::Type{RefOrZeRef{T}}, x) where {T} = Ref{T}(x) Base.unsafe_convert(::Type{RefOrZeRef{T}}, x::Ref{T}) where {T} = Base.bitcast(RefOrZeRef{T}, Base.unsafe_convert(Ptr{T}, x)) Base.unsafe_convert(::Type{RefOrZeRef{T}}, x) where {T} = Base.bitcast(RefOrZeRef{T}, Base.unsafe_convert(Ptr{T}, x)) # support conversion from GPU ref Base.unsafe_convert(::Type{RefOrZeRef{T}}, x::ZeRefs{T}) where {T} = Base.bitcast(RefOrZeRef{T}, Base.unsafe_convert(ZePtr{T}, x)) # support conversion from arrays Base.convert(::Type{RefOrZeRef{T}}, x::Array{T}) where {T} = convert(Ref{T}, x) Base.convert(::Type{RefOrZeRef{T}}, x::AbstractArray{T}) where {T} = convert(ZeRef{T}, x) Base.unsafe_convert(P::Type{RefOrZeRef{T}}, b::ZeRefArray{T}) where T = Base.bitcast(RefOrZeRef{T}, Base.unsafe_convert(ZeRef{T}, b)) ================================================ FILE: lib/level-zero/residency.jl ================================================ export make_resident, evict ## memory function make_resident(ctx::ZeContext, dev::ZeDevice, buf::AbstractBuffer, size=sizeof(buf)) zeContextMakeMemoryResident(ctx, dev, buf, size) end function evict(ctx::ZeContext, dev::ZeDevice, buf::AbstractBuffer, size=sizeof(buf)) zeContextEvictMemory(ctx, dev, buf, size) end ================================================ FILE: lib/level-zero/utils.jl ================================================ isdebug(group) = Base.CoreLogging.current_logger_for_env(Base.CoreLogging.Debug, group, oneL0) !== nothing # Registered callbacks invoked during memory reclamation (e.g., flushing deferred MKL # sparse handle releases). Extensions like oneMKL can register cleanup functions here # so they run when Level Zero reports OOM or when proactive GC fires. const _reclaim_callbacks = Function[] function register_reclaim_callback!(f::Function) return push!(_reclaim_callbacks, f) end function _run_reclaim_callbacks() for cb in _reclaim_callbacks try cb() catch end end return end function retry_reclaim(f, isfailed) ret = f() # slow path, incrementally reclaiming more memory until we succeed if isfailed(ret) phase = 1 while true if phase == 1 GC.gc(false) elseif phase == 2 GC.gc(true) elseif phase == 3 # After GC, finalizers may have deferred resource releases (e.g., MKL # sparse handles). Flush them now, then GC again to free the memory # those releases made available. _run_reclaim_callbacks() GC.gc(true) else break end phase += 1 ret = f() isfailed(ret) || break end end ret end ================================================ FILE: lib/mkl/array.jl ================================================ export oneSparseMatrixCSR, oneSparseMatrixCSC, oneSparseMatrixCOO abstract type oneAbstractSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end const oneAbstractSparseVector{Tv, Ti} = oneAbstractSparseArray{Tv, Ti, 1} const oneAbstractSparseMatrix{Tv, Ti} = oneAbstractSparseArray{Tv, Ti, 2} mutable struct oneSparseMatrixCSR{Tv, Ti} <: oneAbstractSparseMatrix{Tv, Ti} handle::Union{Nothing, matrix_handle_t} rowPtr::oneVector{Ti} colVal::oneVector{Ti} nzVal::oneVector{Tv} dims::NTuple{2,Int} nnz::Ti end mutable struct oneSparseMatrixCSC{Tv, Ti} <: oneAbstractSparseMatrix{Tv, Ti} handle::Union{Nothing, matrix_handle_t} colPtr::oneVector{Ti} rowVal::oneVector{Ti} nzVal::oneVector{Tv} dims::NTuple{2,Int} nnz::Ti end mutable struct oneSparseMatrixCOO{Tv, Ti} <: oneAbstractSparseMatrix{Tv, Ti} handle::Union{Nothing, matrix_handle_t} rowInd::oneVector{Ti} colInd::oneVector{Ti} nzVal::oneVector{Tv} dims::NTuple{2,Int} nnz::Ti end Base.length(A::oneAbstractSparseMatrix) = prod(A.dims) Base.size(A::oneAbstractSparseMatrix) = A.dims function Base.size(A::oneAbstractSparseMatrix, d::Integer) if d == 1 || d == 2 return A.dims[d] else throw(ArgumentError("dimension must be 1 or 2, got $d")) end end SparseArrays.nnz(A::oneAbstractSparseMatrix) = A.nnz SparseArrays.nonzeros(A::oneAbstractSparseMatrix) = A.nzVal for (gpu, cpu) in [:oneSparseMatrixCSR => :SparseMatrixCSC, :oneSparseMatrixCSC => :SparseMatrixCSC, :oneSparseMatrixCOO => :SparseMatrixCSC] @eval Base.show(io::IOContext, x::$gpu) = show(io, $cpu(x)) @eval function Base.show(io::IO, mime::MIME"text/plain", S::$gpu) xnnz = nnz(S) m, n = size(S) print(io, m, "×", n, " ", typeof(S), " with ", xnnz, " stored ", xnnz == 1 ? "entry" : "entries") if !(m == 0 || n == 0) println(io, ":") io = IOContext(io, :typeinfo => eltype(S)) if ndims(S) == 1 show(io, $cpu(S)) else # so that we get the nice Braille pattern Base.print_array(io, $cpu(S)) end end end end ================================================ FILE: lib/mkl/fft.jl ================================================ # oneMKL FFT (DFT) high-level Julia interface # Inspired by AMDGPU ROCFFT interface style, adapted to oneMKL DFT C wrapper. module FFT using ..oneMKL using ..oneMKL: oneAPI, SYCL, syclQueue_t using ..Support using ..SYCL using LinearAlgebra using GPUArrays using AbstractFFTs import AbstractFFTs: complexfloat, realfloat import AbstractFFTs: plan_fft, plan_fft!, plan_bfft, plan_bfft! import AbstractFFTs: plan_rfft, plan_brfft, plan_inv, normalization, ScaledPlan import AbstractFFTs: fft, bfft, ifft, rfft, Plan, ScaledPlan export MKLFFTPlan # Import DFT enums and constants from Support module using ..Support # Allow implicit conversion of SYCL queue object to raw handle when storing/passing Base.convert(::Type{syclQueue_t}, q::SYCL.syclQueue) = Base.unsafe_convert(syclQueue_t, q) abstract type MKLFFTPlan{T,K,inplace} <: AbstractFFTs.Plan{T} end Base.eltype(::MKLFFTPlan{T}) where T = T is_inplace(::MKLFFTPlan{<:Any,<:Any,inplace}) where inplace = inplace # Forward / inverse flags const MKLFFT_FORWARD = true const MKLFFT_INVERSE = false mutable struct cMKLFFTPlan{T,K,inplace,N,R,B} <: MKLFFTPlan{T,K,inplace} handle::onemklDftDescriptor_t queue::syclQueue_t sz::NTuple{N,Int} osz::NTuple{N,Int} realdomain::Bool region::NTuple{R,Int} buffer::B pinv::Any end # Real transforms use separate struct (mirroring AMDGPU style) for buffer staging mutable struct rMKLFFTPlan{T,K,inplace,N,R,B} <: MKLFFTPlan{T,K,inplace} handle::onemklDftDescriptor_t queue::syclQueue_t sz::NTuple{N,Int} osz::NTuple{N,Int} xtype::Symbol region::NTuple{R,Int} buffer::B pinv::Any end # Inverse plan constructors (derive from existing plan) function normalization_factor(sz, region) # AbstractFFTs expects inverse to scale by 1/prod(lengths along region) prod(ntuple(i-> sz[region[i]], length(region))) end function plan_inv(p::cMKLFFTPlan{T,MKLFFT_FORWARD,inplace,N,R,B}) where {T,inplace,N,R,B} q = cMKLFFTPlan{T,MKLFFT_INVERSE,inplace,N,R,B}(p.handle,p.queue,p.sz,p.osz,p.realdomain,p.region,p.buffer,p) p.pinv = q ScaledPlan(q, 1/normalization_factor(p.sz, p.region)) end function plan_inv(p::cMKLFFTPlan{T,MKLFFT_INVERSE,inplace,N,R,B}) where {T,inplace,N,R,B} q = cMKLFFTPlan{T,MKLFFT_FORWARD,inplace,N,R,B}(p.handle,p.queue,p.sz,p.osz,p.realdomain,p.region,p.buffer,p) p.pinv = q ScaledPlan(q, 1/normalization_factor(p.sz, p.region)) end function plan_inv(p::rMKLFFTPlan{T,MKLFFT_FORWARD,inplace,N,R,B}) where {T,inplace,N,R,B} q = rMKLFFTPlan{T,MKLFFT_INVERSE,inplace,N,R,B}(p.handle,p.queue,p.sz,p.osz,:brfft,p.region,p.buffer,p) p.pinv = q ScaledPlan(q, 1/normalization_factor(p.sz, p.region)) end function plan_inv(p::rMKLFFTPlan{T,MKLFFT_INVERSE,inplace,N,R,B}) where {T,inplace,N,R,B} q = rMKLFFTPlan{T,MKLFFT_FORWARD,inplace,N,R,B}(p.handle,p.queue,p.sz,p.osz,:rfft,p.region,p.buffer,p) p.pinv = q ScaledPlan(q, 1/normalization_factor(p.sz, p.region)) end function Base.show(io::IO, p::MKLFFTPlan{T,K,inplace}) where {T,K,inplace} print(io, inplace ? "oneMKL FFT in-place " : "oneMKL FFT ", K ? "forward" : "inverse", " plan for ") if isempty(p.sz); print(io, "0-dimensional") else print(io, join(p.sz, "×")) end print(io, " oneArray of ", T) end # Plan constructors function _create_descriptor(sz::NTuple{N,Int}, T::Type, complex::Bool) where {N} prec = T<:Float64 || T<:ComplexF64 ? ONEMKL_DFT_PRECISION_DOUBLE : ONEMKL_DFT_PRECISION_SINGLE dom = complex ? ONEMKL_DFT_DOMAIN_COMPLEX : ONEMKL_DFT_DOMAIN_REAL desc_ref = Ref{onemklDftDescriptor_t}() # Create descriptor for the full array dimensions lengths = collect(Int64, sz) st = length(lengths) == 1 ? onemklDftCreate1D(desc_ref, prec, dom, lengths[1]) : onemklDftCreateND(desc_ref, prec, dom, length(lengths), pointer(lengths)) st == 0 || error("onemkl DFT create failed (status $st)") desc = desc_ref[] # Do not program descriptor scaling; we'll perform inverse normalization manually. # Set placement explicitly based on plan type later # Construct a SYCL queue from current Level Zero context/device (reuse global queue) ze_ctx = oneAPI.context(); ze_dev = oneAPI.device() sycl_dev = SYCL.syclDevice(SYCL.syclPlatform(oneAPI.driver()), ze_dev) sycl_ctx = SYCL.syclContext([sycl_dev], ze_ctx) q = SYCL.syclQueue(sycl_ctx, sycl_dev, oneAPI.global_queue(ze_ctx, ze_dev)) return desc, q end # Complex plans function plan_fft(X::oneAPI.oneArray{T,N}, region) where {T<:Union{ComplexF32,ComplexF64},N} R = length(region); reg = NTuple{R,Int}(region) # For now, only support full transforms (all dimensions) if reg != ntuple(identity, N) error("Partial dimension FFT not yet supported. Region $reg must be $(ntuple(identity, N))") end desc, q = _create_descriptor(size(X), T, true) onemklDftSetValueConfigValue(desc, ONEMKL_DFT_PARAM_PLACEMENT, ONEMKL_DFT_VALUE_NOT_INPLACE) if N > 1 # Column-major strides: stride along dimension i is product of sizes of previous dims strides = Vector{Int64}(undef, N+1); strides[1]=0 prod = 1 @inbounds for i in 1:N strides[i+1] = prod prod *= size(X,i) end onemklDftSetValueInt64Array(desc, ONEMKL_DFT_PARAM_FWD_STRIDES, pointer(strides), length(strides)) onemklDftSetValueInt64Array(desc, ONEMKL_DFT_PARAM_BWD_STRIDES, pointer(strides), length(strides)) end stc = onemklDftCommit(desc, q); stc == 0 || error("commit failed ($stc)") return cMKLFFTPlan{T,MKLFFT_FORWARD,false,N,R,Nothing}(desc,q,size(X),size(X),false,reg,nothing,nothing) end function plan_bfft(X::oneAPI.oneArray{T,N}, region) where {T<:Union{ComplexF32,ComplexF64},N} R = length(region); reg = NTuple{R,Int}(region) # For now, only support full transforms (all dimensions) if reg != ntuple(identity, N) error("Partial dimension FFT not yet supported. Region $reg must be $(ntuple(identity, N))") end desc, q = _create_descriptor(size(X), T, true) onemklDftSetValueConfigValue(desc, ONEMKL_DFT_PARAM_PLACEMENT, ONEMKL_DFT_VALUE_NOT_INPLACE) if N > 1 strides = Vector{Int64}(undef, N+1); strides[1]=0; prod=1 @inbounds for i in 1:N strides[i+1]=prod; prod*=size(X,i) end onemklDftSetValueInt64Array(desc, ONEMKL_DFT_PARAM_FWD_STRIDES, pointer(strides), length(strides)) onemklDftSetValueInt64Array(desc, ONEMKL_DFT_PARAM_BWD_STRIDES, pointer(strides), length(strides)) end stc = onemklDftCommit(desc, q); stc == 0 || error("commit failed ($stc)") return cMKLFFTPlan{T,MKLFFT_INVERSE,false,N,R,Nothing}(desc,q,size(X),size(X),false,reg,nothing,nothing) end # In-place (provide separate methods) function plan_fft!(X::oneAPI.oneArray{T,N}, region) where {T<:Union{ComplexF32,ComplexF64},N} R = length(region); reg = NTuple{R,Int}(region) # For now, only support full transforms (all dimensions) if reg != ntuple(identity, N) error("Partial dimension FFT not yet supported. Region $reg must be $(ntuple(identity, N))") end desc,q = _create_descriptor(size(X),T,true) onemklDftSetValueConfigValue(desc, ONEMKL_DFT_PARAM_PLACEMENT, ONEMKL_DFT_VALUE_INPLACE) if N > 1 strides = Vector{Int64}(undef, N+1); strides[1]=0; prod=1 @inbounds for i in 1:N strides[i+1]=prod; prod*=size(X,i) end onemklDftSetValueInt64Array(desc, ONEMKL_DFT_PARAM_FWD_STRIDES, pointer(strides), length(strides)) onemklDftSetValueInt64Array(desc, ONEMKL_DFT_PARAM_BWD_STRIDES, pointer(strides), length(strides)) end stc = onemklDftCommit(desc, q); stc == 0 || error("commit failed ($stc)") cMKLFFTPlan{T,MKLFFT_FORWARD,true,N,R,Nothing}(desc,q,size(X),size(X),false,reg,nothing,nothing) end function plan_bfft!(X::oneAPI.oneArray{T,N}, region) where {T<:Union{ComplexF32,ComplexF64},N} R = length(region); reg = NTuple{R,Int}(region) # For now, only support full transforms (all dimensions) if reg != ntuple(identity, N) error("Partial dimension FFT not yet supported. Region $reg must be $(ntuple(identity, N))") end desc,q = _create_descriptor(size(X),T,true) onemklDftSetValueConfigValue(desc, ONEMKL_DFT_PARAM_PLACEMENT, ONEMKL_DFT_VALUE_INPLACE) if N > 1 strides = Vector{Int64}(undef, N+1); strides[1]=0; prod=1 @inbounds for i in 1:N strides[i+1]=prod; prod*=size(X,i) end onemklDftSetValueInt64Array(desc, ONEMKL_DFT_PARAM_FWD_STRIDES, pointer(strides), length(strides)) onemklDftSetValueInt64Array(desc, ONEMKL_DFT_PARAM_BWD_STRIDES, pointer(strides), length(strides)) end stc = onemklDftCommit(desc, q); stc == 0 || error("commit failed ($stc)") cMKLFFTPlan{T,MKLFFT_INVERSE,true,N,R,Nothing}(desc,q,size(X),size(X),false,reg,nothing,nothing) end # Real input methods - convert to complex like FFTW does function plan_fft(X::oneAPI.oneArray{T,N}, region) where {T<:Union{Float32,Float64},N} CT = Complex{T} # Create a complex plan by converting the real array to complex X_complex = oneAPI.oneArray{CT}(undef, size(X)) plan_fft(X_complex, region) end function plan_bfft(X::oneAPI.oneArray{T,N}, region) where {T<:Union{Float32,Float64},N} CT = Complex{T} # Create a complex plan by converting the real array to complex X_complex = oneAPI.oneArray{CT}(undef, size(X)) plan_bfft(X_complex, region) end function plan_fft!(X::oneAPI.oneArray{T,N}, region) where {T<:Union{Float32,Float64},N} error("In-place FFT not supported for real input arrays. Use plan_fft instead.") end function plan_bfft!(X::oneAPI.oneArray{T,N}, region) where {T<:Union{Float32,Float64},N} error("In-place FFT not supported for real input arrays. Use plan_bfft instead.") end # Real forward (out-of-place) - supports multi-dimensional transforms function plan_rfft(X::oneAPI.oneArray{T,N}, region) where {T<:Union{Float32,Float64},N} # Convert region to tuple if it's a range if isa(region, AbstractUnitRange) region = tuple(region...) end R = length(region); reg = NTuple{R,Int}(region) # For single dimension transforms, use the optimized oneMKL real FFT if R == 1 && reg[1] == 1 # Only support transform along first dimension for 1D case return _plan_rfft_1d(X, reg) end # For multi-dimensional transforms, use complex FFT approach # This is mathematically equivalent and works around oneMKL limitations return _plan_rfft_nd(X, reg) end # Single-dimension real FFT using oneMKL (optimized path) function _plan_rfft_1d(X::oneAPI.oneArray{T,N}, reg::NTuple{1,Int}) where {T<:Union{Float32,Float64},N} # Create 1D descriptor for the transform dimension desc,q = _create_descriptor((size(X, reg[1]),), T, false) xdims = size(X) # output along first dim becomes N/2+1 ydims = Base.setindex(xdims, div(xdims[1],2)+1, 1) buffer = oneAPI.oneArray{Complex{T}}(undef, ydims) onemklDftSetValueConfigValue(desc, ONEMKL_DFT_PARAM_PLACEMENT, ONEMKL_DFT_VALUE_NOT_INPLACE) # Set up for batched 1D transforms along first dimension if N > 1 # Number of 1D transforms = product of all other dimensions num_transforms = prod(xdims[2:end]) onemklDftSetValueInt64(desc, ONEMKL_DFT_PARAM_NUMBER_OF_TRANSFORMS, Int64(num_transforms)) # Distance between consecutive transforms (stride along batching dimension) onemklDftSetValueInt64(desc, ONEMKL_DFT_PARAM_FWD_DISTANCE, Int64(xdims[1])) onemklDftSetValueInt64(desc, ONEMKL_DFT_PARAM_BWD_DISTANCE, Int64(ydims[1])) end stc = onemklDftCommit(desc, q); stc == 0 || error("commit failed ($stc)") R = length(reg) rMKLFFTPlan{T,MKLFFT_FORWARD,false,N,R,typeof(buffer)}(desc,q,xdims,ydims,:rfft,reg,buffer,nothing) end # Multi-dimensional real FFT using complex FFT approach struct ComplexBasedRealFFTPlan{T,N,R} <: MKLFFTPlan{T,MKLFFT_FORWARD,false} complex_plan::cMKLFFTPlan{Complex{T},MKLFFT_FORWARD,false,N,R,Nothing} sz::NTuple{N,Int} osz::NTuple{N,Int} region::NTuple{R,Int} end function _plan_rfft_nd(X::oneAPI.oneArray{T,N}, reg::NTuple{R,Int}) where {T<:Union{Float32,Float64},N,R} # Create complex version for planning X_complex = oneAPI.oneArray{Complex{T}}(undef, size(X)) complex_plan = plan_fft(X_complex, reg) # Calculate output dimensions (real FFT output size) xdims = size(X) ydims = ntuple(N) do i if i in reg && i == minimum(reg) # First dimension in region gets reduced div(xdims[i], 2) + 1 else xdims[i] end end ComplexBasedRealFFTPlan{T,N,R}(complex_plan, xdims, ydims, reg) end # Show method for complex-based plan function Base.show(io::IO, p::ComplexBasedRealFFTPlan{T}) where {T} print(io, "oneMKL FFT forward plan for ") if isempty(p.sz); print(io, "0-dimensional") else print(io, join(p.sz, "×")) end print(io, " oneArray of ", T, " (multi-dimensional via complex FFT)") end # Execution for complex-based real FFT plan function Base.:*(p::ComplexBasedRealFFTPlan{T,N,R}, X::oneAPI.oneArray{T}) where {T,N,R} # Convert to complex X_complex = Complex{T}.(X) # Perform complex FFT Y_complex = p.complex_plan * X_complex # Extract appropriate portion for real FFT result # For real FFT, we only need roughly half the output due to conjugate symmetry indices = ntuple(N) do i if i in p.region && i == minimum(p.region) # First dimension in region: take 1:(N÷2+1) 1:(div(p.sz[i], 2) + 1) else # Other dimensions: take all 1:p.sz[i] end end Y = Y_complex[indices...] return Y end # Real inverse (complex->real) requires complex input shape - supports multi-dimensional transforms function plan_brfft(X::oneAPI.oneArray{T,N}, d::Integer, region) where {T<:Union{ComplexF32,ComplexF64},N} # Convert region to tuple if it's a range if isa(region, AbstractUnitRange) region = tuple(region...) end R = length(region); reg = NTuple{R,Int}(region) # For single dimension transforms along first dim, use optimized oneMKL path if R == 1 && reg[1] == 1 return _plan_brfft_1d(X, d, reg) end # For multi-dimensional transforms, use complex FFT approach return _plan_brfft_nd(X, d, reg) end # Single-dimension real inverse FFT using oneMKL (optimized path) function _plan_brfft_1d(X::oneAPI.oneArray{T,N}, d::Integer, reg::NTuple{1,Int}) where {T<:Union{ComplexF32,ComplexF64},N} # Extract underlying real type R from Complex{R} @assert T <: Complex RT = T.parameters[1] # Create 1D descriptor for the transform dimension desc,q = _create_descriptor((d,), RT, false) xdims = size(X) ydims = Base.setindex(xdims, d, 1) buffer = oneAPI.oneArray{T}(undef, xdims) # copy for safety onemklDftSetValueConfigValue(desc, ONEMKL_DFT_PARAM_PLACEMENT, ONEMKL_DFT_VALUE_NOT_INPLACE) # For now, disable batching for real inverse FFTs due to oneMKL parameter conflicts # Use loop-based approach instead for multi-dimensional arrays if N > 1 @info "Batched real inverse FFTs not yet supported by oneMKL - please use loop-based approach or 1D arrays" end stc = onemklDftCommit(desc, q); stc == 0 || error("commit failed ($stc)") R = length(reg) rMKLFFTPlan{T,MKLFFT_INVERSE,false,N,R,typeof(buffer)}(desc,q,xdims,ydims,:brfft,reg,buffer,nothing) end # Multi-dimensional real inverse FFT using complex FFT approach struct ComplexBasedRealIFFTPlan{T,N,R} <: MKLFFTPlan{T,MKLFFT_INVERSE,false} complex_plan::cMKLFFTPlan{T,MKLFFT_INVERSE,false,N,R,Nothing} sz::NTuple{N,Int} osz::NTuple{N,Int} region::NTuple{R,Int} d::Int # Original size of the reduced dimension end function _plan_brfft_nd(X::oneAPI.oneArray{T,N}, d::Integer, reg::NTuple{R,Int}) where {T<:Union{ComplexF32,ComplexF64},N,R} # Calculate the full complex array size (before real FFT reduction) xdims = size(X) full_complex_dims = ntuple(N) do i if i in reg && i == minimum(reg) # First dimension in region was reduced d # Restore original size else xdims[i] end end # Create complex version for planning - use the full size X_complex_full = oneAPI.oneArray{T}(undef, full_complex_dims) complex_plan = plan_bfft(X_complex_full, reg) ComplexBasedRealIFFTPlan{T,N,R}(complex_plan, xdims, full_complex_dims, reg, d) end # Show method for complex-based inverse plan function Base.show(io::IO, p::ComplexBasedRealIFFTPlan{T}) where {T} print(io, "oneMKL FFT inverse plan for ") if isempty(p.sz); print(io, "0-dimensional") else print(io, join(p.sz, "×")) end print(io, " oneArray of ", T, " (multi-dimensional via complex FFT)") end # Execution for complex-based real inverse FFT plan function Base.:*(p::ComplexBasedRealIFFTPlan{T,N,R}, X::oneAPI.oneArray{T}) where {T,N,R} # Reconstruct full complex array by exploiting conjugate symmetry # This is a simplified approach - for full accuracy, we'd need to properly # reconstruct the conjugate symmetric part # For now, pad with zeros (this works for certain cases but isn't fully general) xdims = size(X) full_indices = ntuple(N) do i if i in p.region && i == minimum(p.region) # Extend the reduced dimension 1:p.d else 1:xdims[i] end end # Create full complex array and copy the available data X_full = oneAPI.oneArray{T}(undef, p.osz) fill!(X_full, zero(T)) # Copy the input data to the appropriate slice # NOTE: This is a simplified approach that doesn't fully reconstruct # conjugate symmetry. For full accuracy, proper conjugate symmetric # reconstruction should be implemented. copy_indices = ntuple(N) do i if i in p.region && i == minimum(p.region) 1:xdims[i] # Only the available part else 1:xdims[i] end end X_full[copy_indices...] = X # Perform complex inverse FFT Y_complex = p.complex_plan * X_full # Extract real part (this is where the real output comes from) return real.(Y_complex) end # Inverse plan for complex-based real FFT plans function plan_inv(p::ComplexBasedRealFFTPlan{T,N,R}) where {T,N,R} # For real FFT inverse, we need plan_brfft functionality # The first dimension in the region should be the one that was reduced first_dim = minimum(p.region) d = p.sz[first_dim] # Original size of the reduced dimension # Create inverse plan using our new multi-dimensional brfft brfft_plan = _plan_brfft_nd(oneAPI.oneArray{Complex{T}}(undef, p.osz), d, p.region) ScaledPlan(brfft_plan, 1/normalization_factor(p.sz, p.region)) end # Inverse plan for complex-based real inverse FFT plans function plan_inv(p::ComplexBasedRealIFFTPlan{T,N,R}) where {T,N,R} # Create forward plan forward_plan = _plan_rfft_nd(oneAPI.oneArray{real(T)}(undef, p.osz), p.region) ScaledPlan(forward_plan, 1/normalization_factor(p.osz, p.region)) end # Convenience no-region methods use all dimensions in order plan_fft(X::oneAPI.oneArray) = plan_fft(X, ntuple(identity, ndims(X))) plan_bfft(X::oneAPI.oneArray) = plan_bfft(X, ntuple(identity, ndims(X))) plan_fft!(X::oneAPI.oneArray) = plan_fft!(X, ntuple(identity, ndims(X))) plan_bfft!(X::oneAPI.oneArray) = plan_bfft!(X, ntuple(identity, ndims(X))) plan_rfft(X::oneAPI.oneArray) = plan_rfft(X, ntuple(identity, ndims(X))) # default all dims like Base.rfft plan_brfft(X::oneAPI.oneArray, d::Integer) = plan_brfft(X, d, ntuple(identity, ndims(X))) # Alias names to mirror AMDGPU / AbstractFFTs style const plan_ifft = plan_bfft const plan_ifft! = plan_bfft! # plan_irfft should be normalized, unlike plan_brfft plan_irfft(X::oneAPI.oneArray{T,N}, d::Integer, region) where {T,N} = begin p = plan_brfft(X, d, region) ScaledPlan(p, 1/normalization_factor(p.sz, p.region)) end plan_irfft(X::oneAPI.oneArray{T,N}, d::Integer) where {T,N} = plan_irfft(X, d, (1,)) # Inversion Base.inv(p::MKLFFTPlan) = plan_inv(p) # High-level wrappers operating like CPU FFTW versions. function fft(X::oneAPI.oneArray{T}) where {T<:Union{ComplexF32,ComplexF64}} (plan_fft(X) * X) end function ifft(X::oneAPI.oneArray{T}) where {T<:Union{ComplexF32,ComplexF64}} p = plan_bfft(X) # Apply normalization for ifft (unlike bfft which is unnormalized) scaling = one(T) / normalization_factor(size(X), ntuple(identity, ndims(X))) scaling * (p * X) end function fft!(X::oneAPI.oneArray{T}) where {T<:Union{ComplexF32,ComplexF64}} (plan_fft!(X) * X; X) end function ifft!(X::oneAPI.oneArray{T}) where {T<:Union{ComplexF32,ComplexF64}} p = plan_bfft!(X) # Apply normalization for ifft! (unlike bfft! which is unnormalized) scaling = one(T) / normalization_factor(size(X), ntuple(identity, ndims(X))) p * X X .*= scaling X end function rfft(X::oneAPI.oneArray{T}) where {T<:Union{Float32,Float64}} (plan_rfft(X) * X) end function irfft(X::oneAPI.oneArray{T}, d::Integer) where {T<:Union{ComplexF32,ComplexF64}} # Use the normalized plan_irfft instead of unnormalized plan_brfft (plan_irfft(X, d) * X) end # Execution helpers _rawptr(a::oneAPI.oneArray{T}) where T = reinterpret(Ptr{Cvoid}, pointer(a)) function _exec!(p::cMKLFFTPlan{T,MKLFFT_FORWARD,true}, X::oneAPI.oneArray{T}) where T st = onemklDftComputeForward(p.handle, _rawptr(X)); st==0 || error("forward FFT failed ($st)"); X end function _exec!(p::cMKLFFTPlan{T,MKLFFT_INVERSE,true}, X::oneAPI.oneArray{T}) where T st = onemklDftComputeBackward(p.handle, _rawptr(X)); st==0 || error("inverse FFT failed ($st)"); X end function _exec!(p::cMKLFFTPlan{T,K,false}, X::oneAPI.oneArray{T}, Y::oneAPI.oneArray{T}) where {T,K} st = (K==MKLFFT_FORWARD ? onemklDftComputeForwardOutOfPlace : onemklDftComputeBackwardOutOfPlace)(p.handle, _rawptr(X), _rawptr(Y)); st==0 || error("FFT failed ($st)"); Y end # Real forward function _exec!(p::rMKLFFTPlan{T,MKLFFT_FORWARD,false}, X::oneAPI.oneArray{T}, Y::oneAPI.oneArray{Complex{T}}) where T st = onemklDftComputeForwardOutOfPlace(p.handle, _rawptr(X), _rawptr(Y)); st==0 || error("rfft failed ($st)"); Y end # Real inverse (complex -> real) function _exec!(p::rMKLFFTPlan{T,MKLFFT_INVERSE,false}, X::oneAPI.oneArray{T}, Y::oneAPI.oneArray{R}) where {R,T<:Complex{R}} st = onemklDftComputeBackwardOutOfPlace(p.handle, _rawptr(X), _rawptr(Y)); st==0 || error("brfft failed ($st)"); Y end # Public API similar to AMDGPU function Base.:*(p::cMKLFFTPlan{T,K,true}, X::oneAPI.oneArray{T}) where {T,K} _exec!(p,X) end function Base.:*(p::cMKLFFTPlan{T,K,false}, X::oneAPI.oneArray{T}) where {T,K} Y = oneAPI.oneArray{T}(undef, p.osz); _exec!(p,X,Y) end function LinearAlgebra.mul!(Y::oneAPI.oneArray{T}, p::cMKLFFTPlan{T,K,false}, X::oneAPI.oneArray{T}) where {T,K} _exec!(p,X,Y) end # Real forward function Base.:*(p::rMKLFFTPlan{T,MKLFFT_FORWARD,false}, X::oneAPI.oneArray{T}) where {T<:Union{Float32,Float64}} Y = oneAPI.oneArray{Complex{T}}(undef, p.osz); _exec!(p,X,Y) end function LinearAlgebra.mul!(Y::oneAPI.oneArray{Complex{T}}, p::rMKLFFTPlan{T,MKLFFT_FORWARD,false}, X::oneAPI.oneArray{T}) where {T<:Union{Float32,Float64}} _exec!(p,X,Y) end # Real inverse function Base.:*(p::rMKLFFTPlan{T,MKLFFT_INVERSE,false}, X::oneAPI.oneArray{T}) where {R,T<:Complex{R}} Y = oneAPI.oneArray{R}(undef, p.osz); _exec!(p,X,Y) end function LinearAlgebra.mul!(Y::oneAPI.oneArray{R}, p::rMKLFFTPlan{T,MKLFFT_INVERSE,false}, X::oneAPI.oneArray{T}) where {R,T<:Complex{R}} _exec!(p,X,Y) end # Support for applying complex plans to real arrays (convert real to complex first) function Base.:*(p::cMKLFFTPlan{T,K,false}, X::oneAPI.oneArray{R}) where {T,K,R<:Union{Float32,Float64}} # Only allow if T is the complex version of R if T != Complex{R} error("Type mismatch: plan expects $(T) but got $(R)") end # Convert real input to complex X_complex = complex.(X) p * X_complex end function LinearAlgebra.mul!(Y::oneAPI.oneArray{T}, p::cMKLFFTPlan{T,K,false}, X::oneAPI.oneArray{R}) where {T,K,R<:Union{Float32,Float64}} # Only allow if T is the complex version of R if T != Complex{R} error("Type mismatch: plan expects $(T) but got $(R)") end # Convert real input to complex X_complex = complex.(X) _exec!(p, X_complex, Y) end end # module FFT ================================================ FILE: lib/mkl/interfaces.jl ================================================ # interfacing with other packages using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, MulAddMul # legacy methods with final MulAddMul argument LinearAlgebra.generic_matvecmul!(C::oneVector{T}, tA::AbstractChar, A::oneSparseMatrixCSR{T}, B::oneVector{T}, _add::MulAddMul) where {T <: Union{Float16, ComplexF16, BlasFloat}} = LinearAlgebra.generic_matvecmul!(C, tA, A, B, _add.alpha, _add.beta) LinearAlgebra.generic_matvecmul!(C::oneVector{T}, tA::AbstractChar, A::oneSparseMatrixCSC{T}, B::oneVector{T}, _add::MulAddMul) where {T <: Union{Float16, ComplexF16, BlasFloat}} = LinearAlgebra.generic_matvecmul!(C, tA, A, B, _add.alpha, _add.beta) LinearAlgebra.generic_matmatmul!(C::oneMatrix{T}, tA, tB, A::oneSparseMatrixCSR{T}, B::oneMatrix{T}, _add::MulAddMul) where {T <: Union{Float16, ComplexF16, BlasFloat}} = LinearAlgebra.generic_matmatmul!(C, tA, tB, A, B, _add.alpha, _add.beta) LinearAlgebra.generic_matmatmul!(C::oneMatrix{T}, tA, tB, A::oneSparseMatrixCSC{T}, B::oneMatrix{T}, _add::MulAddMul) where {T <: Union{Float16, ComplexF16, BlasFloat}} = LinearAlgebra.generic_matmatmul!(C, tA, tB, A, B, _add.alpha, _add.beta) function LinearAlgebra.generic_matvecmul!(C::oneVector{T}, tA::AbstractChar, A::oneSparseMatrixCSR{T}, B::oneVector{T}, alpha::Number, beta::Number) where {T <: BlasFloat} tA = tA in ('S', 's', 'H', 'h') ? 'N' : tA return sparse_gemv!(tA, alpha, A, B, beta, C) end function LinearAlgebra.generic_matvecmul!(C::oneVector{T}, tA::AbstractChar, A::oneSparseMatrixCSC{T}, B::oneVector{T}, alpha::Number, beta::Number) where {T <: BlasReal} tA = tA in ('S', 's', 'H', 'h') ? 'T' : flip_trans(tA) return sparse_gemv!(tA, alpha, A, B, beta, C) end function LinearAlgebra.generic_matmatmul!(C::oneMatrix{T}, tA, tB, A::oneSparseMatrixCSR{T}, B::oneMatrix{T}, alpha::Number, beta::Number) where {T <: BlasFloat} tA = tA in ('S', 's', 'H', 'h') ? 'N' : tA tB = tB in ('S', 's', 'H', 'h') ? 'N' : tB return sparse_gemm!(tA, tB, alpha, A, B, beta, C) end function LinearAlgebra.generic_matmatmul!(C::oneMatrix{T}, tA, tB, A::oneSparseMatrixCSC{T}, B::oneMatrix{T}, alpha::Number, beta::Number) where {T <: BlasReal} tA = tA in ('S', 's', 'H', 'h') ? 'T' : flip_trans(tA) tB = tB in ('S', 's', 'H', 'h') ? 'N' : tB return sparse_gemm!(tA, tB, alpha, A, B, beta, C) end function LinearAlgebra.generic_trimatdiv!(C::oneVector{T}, uploc, isunitc, tfun::Function, A::oneSparseMatrixCSR{T}, B::oneVector{T}) where {T <: BlasFloat} return sparse_trsv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, B, C) end function LinearAlgebra.generic_trimatdiv!(C::oneMatrix{T}, uploc, isunitc, tfun::Function, A::oneSparseMatrixCSR{T}, B::oneMatrix{T}) where {T <: BlasFloat} return sparse_trsm!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', isunitc, one(T), A, B, C) end ================================================ FILE: lib/mkl/linalg.jl ================================================ # interfacing with LinearAlgebra standard library import LinearAlgebra using LinearAlgebra: Transpose, Adjoint, Hermitian, Symmetric, LowerTriangular, UnitLowerTriangular, UpperTriangular, UnitUpperTriangular, MulAddMul, wrap # # BLAS 1 # LinearAlgebra.rmul!(x::oneStridedVecOrMat{T}, k::Number) where T<:Union{onemklHalf,onemklFloat} = scal!(length(x), T(k), x) # Work around ambiguity with GPUArrays wrapper LinearAlgebra.rmul!(x::oneStridedVecOrMat{<:onemklFloat}, k::Real) = invoke(LinearAlgebra.rmul!, Tuple{typeof(x), Number}, x, k) LinearAlgebra.norm(x::oneStridedVecOrMat{<:Union{Float16,ComplexF16,onemklFloat}}) = nrm2(length(x), x) function LinearAlgebra.dot(x::oneStridedVector{T}, y::oneStridedVector{T}) where T<:Union{Float16, Float32, Float64} n = length(x) n == length(y) || throw(DimensionMismatch("dot product arguments have lengths $(length(x)) and $(length(y))")) dot(n, x, y) end function LinearAlgebra.dot(x::oneStridedVector{T}, y::oneStridedVector{T}) where T<:Union{ComplexF16,ComplexF32, ComplexF64} n = length(x) n == length(y) || throw(DimensionMismatch("dot product arguments have lengths $(length(x)) and $(length(y))")) dotc(n, x, y) end function LinearAlgebra.:(*)(transx::Transpose{<:Any,<:oneStridedVector{T}}, y::oneStridedVector{T}) where T <:Union{ComplexF16, ComplexF32, ComplexF64} x = transx.parent n = length(x) n == length(y) || throw(DimensionMismatch("dot product arguments have lengths $(length(x)) and $(length(y))")) oneMKL.dotu(n, x, y) end LinearAlgebra.BLAS.asum(x::oneStridedVecOrMat{<:onemklFloat}) = asum(length(x), x) function LinearAlgebra.axpy!(alpha::Number, x::oneStridedVecOrMat{T}, y::oneStridedVecOrMat{T}) where T<:Union{onemklHalf,onemklFloat} length(x)==length(y) || throw(DimensionMismatch("axpy arguments have lengths $(length(x)) and $(length(y))")) axpy!(length(x), alpha, x, y) end function LinearAlgebra.axpby!(alpha::Number, x::oneStridedVecOrMat{T}, beta::Number, y::oneStridedVecOrMat{T}) where T<:onemklFloat length(x)==length(y) || throw(DimensionMismatch("axpby arguments have lengths $(length(x)) and $(length(y))")) axpby!(length(x), alpha, x, beta, y) end function LinearAlgebra.rotate!(x::oneStridedVecOrMat{T}, y::oneStridedVecOrMat{T}, c::Number, s::Number) where T<:onemklFloat nx = length(x) ny = length(y) nx==ny || throw(DimensionMismatch("rotate arguments have lengths $nx and $ny")) rot!(nx, x, y, c, s) end function LinearAlgebra.reflect!(x::oneStridedVecOrMat{T}, y::oneStridedVecOrMat{T}, c::Number, s::Number) where T<:onemklFloat nx = length(x) ny = length(y) nx==ny || throw(DimensionMismatch("reflect arguments have lengths $nx and $ny")) rot!(nx, x, y, c, s) scal!(ny, -one(real(T)), y) x, y end # # BLAS 2 LinearAlgebra.generic_matvecmul!(Y::oneVector, tA::AbstractChar, A::oneStridedMatrix, B::oneStridedVector, _add::MulAddMul) = LinearAlgebra.generic_matvecmul!(Y, tA, A, B, _add.alpha, _add.beta) function LinearAlgebra.generic_matvecmul!(Y::oneVector, tA::AbstractChar, A::oneStridedMatrix, B::oneStridedVector, a::Number, b::Number) mA, nA = tA == 'N' ? size(A) : reverse(size(A)) if nA != length(B) throw(DimensionMismatch("second dimension of A, $nA, does not match length of B, $(length(B))")) end if mA != length(Y) throw(DimensionMismatch("first dimension of A, $mA, does not match length of Y, $(length(Y))")) end if mA == 0 return Y end if nA == 0 return rmul!(Y, 0) end T = eltype(Y) alpha, beta = promote(a, b, zero(T)) if alpha isa Union{Bool,T} && beta isa Union{Bool,T} if T <: onemklFloat && eltype(A) == eltype(B) == T if tA in ('N', 'T', 'C') return gemv!(tA, alpha, A, B, beta, Y) elseif tA in ('S', 's') return symv!(tA == 'S' ? 'U' : 'L', alpha, A, B, beta, Y) elseif tA in ('H', 'h') return hemv!(tA == 'H' ? 'U' : 'L', alpha, A, B, beta, Y) end end end return LinearAlgebra.generic_matmatmul!(Y, tA, 'N', A, B, alpha, beta) end # triangular ## multiplication LinearAlgebra.generic_trimatmul!(c::oneStridedVector{T}, uploc, isunitc, tfun::Function, A::oneStridedMatrix{T}, b::oneStridedVector{T}) where {T<:onemklFloat} = trmv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, c === b ? c : copyto!(c, b)) ## division LinearAlgebra.generic_trimatdiv!(C::oneStridedVector{T}, uploc, isunitc, tfun::Function, A::oneStridedMatrix{T}, B::oneStridedVector{T}) where {T<:onemklFloat} = trsv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : copyto!(C, B)) # # BLAS 3 # LinearAlgebra.generic_matmatmul!( C::oneStridedVecOrMat, tA, tB, A::oneStridedVecOrMat, B::oneStridedVecOrMat, _add::MulAddMul, ) = LinearAlgebra.generic_matmatmul!(C, tA, tB, A, B, _add.alpha, _add.beta) function LinearAlgebra.generic_matmatmul!( C::oneStridedVecOrMat, tA, tB, A::oneStridedVecOrMat, B::oneStridedVecOrMat, alpha::Number, beta::Number, ) T = eltype(C) mA, nA = size(A, tA == 'N' ? 1 : 2), size(A, tA == 'N' ? 2 : 1) mB, nB = size(B, tB == 'N' ? 1 : 2), size(B, tB == 'N' ? 2 : 1) nA != mB && throw( DimensionMismatch( "A has dimensions ($mA,$nA) but B has dimensions ($mB,$nB)" ) ) (C === A || B === C) && throw( ArgumentError( "output matrix must not be aliased with input matrix" ) ) if mA == 0 || nA == 0 || nB == 0 size(C) != (mA, nB) && throw( DimensionMismatch( "C has dimensions $(size(C)), should have ($mA,$nB)" ) ) return LinearAlgebra.rmul!(C, 0) end T = eltype(C) if alpha isa Union{Bool,T} && beta isa Union{Bool,T} # TODO: should the gemm part above be included in this branch? α, β = T(alpha), T(beta) if ( all(in(('N', 'T', 'C')), (tA, tB)) && T <: Union{onemklFloat, onemklComplex, onemklHalf} && A isa oneStridedArray{T} && B isa oneStridedArray{T} ) return gemm!(tA, tB, α, A, B, β, C) elseif (tA == 'S' || tA == 's') && tB == 'N' return symm!('L', tA == 'S' ? 'U' : 'L', α, A, B, β, C) elseif (tB == 'S' || tB == 's') && tA == 'N' return symm!('R', tB == 'S' ? 'U' : 'L', α, B, A, β, C) elseif (tA == 'H' || tA == 'h') && tB == 'N' return hemm!('L', tA == 'H' ? 'U' : 'L', α, A, B, β, C) elseif (tB == 'H' || tB == 'h') && tA == 'N' return hemm!('R', tB == 'H' ? 'U' : 'L', α, B, A, β, C) end end GPUArrays.generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), alpha, beta) end # triangular LinearAlgebra.generic_trimatmul!(C::oneStridedMatrix{T}, uploc, isunitc, tfun::Function, A::oneStridedMatrix{T}, B::oneStridedMatrix{T}) where {T<:onemklFloat} = trmm!('L', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, C === B ? C : copyto!(C, B)) LinearAlgebra.generic_mattrimul!(C::oneStridedMatrix{T}, uploc, isunitc, tfun::Function, A::oneStridedMatrix{T}, B::oneStridedMatrix{T}) where {T<:onemklFloat} = trmm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A)) LinearAlgebra.generic_trimatdiv!(C::oneStridedMatrix{T}, uploc, isunitc, tfun::Function, A::oneStridedMatrix{T}, B::oneStridedMatrix{T}) where {T<:onemklFloat} = trsm!('L', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, C === B ? C : copyto!(C, B)) LinearAlgebra.generic_mattridiv!(C::oneStridedMatrix{T}, uploc, isunitc, tfun::Function, A::oneStridedMatrix{T}, B::oneStridedMatrix{T}) where {T<:onemklFloat} = trsm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A)) # # BLAS extensions # # Extend LinearAlgebra.BLAS.herk! to dispatch to oneAPI implementation for (elty) in ([Float32, ComplexF32], [Float64, ComplexF64]) @eval begin LinearAlgebra.BLAS.herk!(uplo::Char, trans::Char, alpha::$elty[1], A::oneStridedVecOrMat{$elty[2]}, beta::$elty[1], C::oneStridedMatrix{$elty[2]}) = herk!(uplo, trans, alpha, A, beta, C) end end # Extend LinearAlgebra.BLAS.syrk! to dispatch to oneAPI implementation for (elty) in (Float32, Float64, ComplexF32, ComplexF64) @eval begin LinearAlgebra.BLAS.syrk!(uplo::Char, trans::Char, alpha::$elty, A::oneStridedVecOrMat{$elty}, beta::$elty, C::oneStridedMatrix{$elty}) = syrk!(uplo, trans, alpha, A, beta, C) end end ================================================ FILE: lib/mkl/oneMKL.jl ================================================ module oneMKL using ..oneAPI using ..oneAPI: unsafe_free! using ..oneL0 using ..Support using ..SYCL using ..SYCL: syclQueue_t using GPUArrays using LinearAlgebra using LinearAlgebra: checksquare using LinearAlgebra.LAPACK: chkargsok, chklapackerror, chktrans, chkside, chkdiag, chkuplo using SparseArrays # Exclude Float16 for now, since many oneMKL functions do not take Float16 const onemklFloat = Union{Float64,Float32,ComplexF64,ComplexF32} const onemklComplex = Union{ComplexF32,ComplexF64} const onemklHalf = Float16 include("array.jl") include("utils.jl") include("wrappers_blas.jl") include("wrappers_lapack.jl") include("wrappers_sparse.jl") include("linalg.jl") include("interfaces.jl") include("fft.jl") # Register deferred sparse handle flush as a memory reclaim callback so that OOM # recovery (retry_reclaim) and proactive GC (_maybe_gc) can free MKL internal buffers # associated with sparse matrix handles that were deferred from finalizer threads. oneL0.register_reclaim_callback!(flush_deferred_sparse_releases) function version() major = Ref{Int64}() minor = Ref{Int64}() patch = Ref{Int64}() onemkl_version(major, minor, patch) return VersionNumber(major[], minor[], patch[]) end function band(A::StridedArray, kl, ku) m, n = size(A) AB = zeros(eltype(A),kl+ku+1,n) for j = 1:n for i = max(1,j-ku):min(m,j+kl) AB[ku+1-j+i,j] = A[i,j] end end return AB end # convert band storage to general matrix function unband(AB::StridedArray,m,kl,ku) bm, n = size(AB) A = zeros(eltype(AB),m,n) for j = 1:n for i = max(1,j-ku):min(m,j+kl) A[i,j] = AB[ku+1-j+i,j] end end return A end # zero out elements not on matrix bands function bandex(A::AbstractMatrix,kl,ku) m, n = size(A) AB = band(A,kl,ku) B = unband(AB,m,kl,ku) return B end end ================================================ FILE: lib/mkl/utils.jl ================================================ # # Auxiliary # function Base.convert(::Type{onemklSide}, side::Char) if side == 'L' return ONEMKL_SIDE_LEFT elseif side == 'R' return ONEMKL_SIDE_RIGHT else throw(ArgumentError("Unknown transpose $side")) end end function Base.convert(::Type{onemklTranspose}, trans::Char) if trans == 'N' return ONEMKL_TRANSPOSE_NONTRANS elseif trans == 'T' return ONEMKL_TRANSPOSE_TRANS elseif trans == 'C' return ONEMLK_TRANSPOSE_CONJTRANS else throw(ArgumentError("Unknown transpose $trans")) end end function Base.convert(::Type{onemklUplo}, uplo::Char) if uplo == 'U' return ONEMKL_UPLO_UPPER elseif uplo == 'L' return ONEMKL_UPLO_LOWER else throw(ArgumentError("Unknown transpose $uplo")) end end function Base.convert(::Type{onemklDiag}, diag::Char) if diag == 'N' return ONEMKL_DIAG_NONUNIT elseif diag == 'U' return ONEMKL_DIAG_UNIT else throw(ArgumentError("Unknown transpose $diag")) end end function Base.convert(::Type{onemklIndex}, index::Char) if index == 'O' return ONEMKL_INDEX_ONE elseif index == 'Z' return ONEMKL_INDEX_ZERO else throw(ArgumentError("Unknown index $index")) end end function Base.convert(::Type{onemklLayout}, index::Char) if index == 'R' return ONEMKL_LAYOUT_ROW elseif index == 'C' return ONEMKL_LAYOUT_COL else throw(ArgumentError("Unknown layout $layout")) end end function Base.convert(::Type{onemklJobsvd}, job::Char) if job == 'N' return ONEMKL_JOBSVD_N elseif job == 'A' return ONEMKL_JOBSVD_A elseif job == 'O' return ONEMKL_JOBSVD_O elseif job == 'S' return ONEMKL_JOBSVD_S else throw(ArgumentError("Unknown job $job.")) end end function Base.convert(::Type{onemklJob}, job::Char) if job == 'N' return ONEMKL_JOB_N elseif job == 'V' return ONEMKL_JOB_V elseif job == 'U' return ONEMKL_JOB_U elseif job == 'A' return ONEMKL_JOB_A elseif job == 'S' return ONEMKL_JOB_S elseif job == 'O' return ONEMKL_JOB_O else throw(ArgumentError("Unknown job $job.")) end end function Base.convert(::Type{onemklRangev}, range::Char) if range == 'A' return ONEMKL_RANGEV_A elseif range == 'V' return ONEMKL_RANGEV_V elseif range == 'I' return ONEMKL_RANGEV_I else throw(ArgumentError("Unknown eigenvalue solver range $range.")) end end # create a batch of pointers in device memory from a batch of device arrays @inline function unsafe_batch(batch::Vector{<:oneArray{T}}) where {T} ptrs = pointer.(batch) return oneArray(ptrs) end flip_trans(trans::Char) = trans == 'N' ? 'T' : 'N' flip_uplo(uplo::Char) = uplo == 'L' ? 'U' : 'L' ================================================ FILE: lib/mkl/wrappers_blas.jl ================================================ ## (GE) general matrix-matrix multiplication batched for (fname, elty) in ((:onemklHgemm_batch, :Float16), (:onemklSgemm_batch, :Float32), (:onemklDgemm_batch, :Float64), (:onemklCgemm_batch, :ComplexF32), (:onemklZgemm_batch, :ComplexF64)) @eval begin function gemm_batched!(transA::Char, transB::Char, alpha::Number, A::Vector{<:oneStridedMatrix{$elty}}, B::Vector{<:oneStridedMatrix{$elty}}, beta::Number, C::Vector{<:oneStridedMatrix{$elty}}) if length(A) != length(B) || length(A) != length(C) throw(DimensionMismatch("")) end for (As,Bs,Cs) in zip(A,B,C) m = size(As, transA == 'N' ? 1 : 2) k = size(As, transA == 'N' ? 2 : 1) n = size(Bs, transB == 'N' ? 2 : 1) if m != size(Cs,1) || n != size(Cs,2) || k != size(Bs, transB == 'N' ? 1 : 2) throw(DimensionMismatch("")) end end m = size(A[1], transA == 'N' ? 1 : 2) k = size(A[1], transA == 'N' ? 2 : 1) n = size(B[1], transB == 'N' ? 2 : 1) lda = max(1,stride(A[1],2)) ldb = max(1,stride(B[1],2)) ldc = max(1,stride(C[1],2)) Aptrs = unsafe_batch(A) Bptrs = unsafe_batch(B) Cptrs = unsafe_batch(C) bsize = length(A) m_dev = oneVector{Int}(fill(m,bsize)) n_dev = oneVector{Int}(fill(n,bsize)) k_dev = oneVector{Int}(fill(k,bsize)) lda_dev = oneVector{Int}(fill(lda,bsize)) ldb_dev = oneVector{Int}(fill(ldb,bsize)) ldc_dev = oneVector{Int}(fill(ldc,bsize)) alpha_dev = oneVector{$elty}(fill(alpha,bsize)) beta_dev = oneVector{$elty}(fill(beta,bsize)) groupsize_dev = oneVector{Int}(fill(1,bsize)) queue = global_queue(context(A[1]), device(A[1])) $fname(sycl_queue(queue), transA, transB, m_dev, n_dev, k_dev, alpha_dev, Aptrs, lda_dev, Bptrs, ldb_dev, beta_dev, Cptrs, ldc_dev, length(A), groupsize_dev) unsafe_free!(Cptrs) unsafe_free!(Bptrs) unsafe_free!(Aptrs) unsafe_free!(m_dev) unsafe_free!(n_dev) unsafe_free!(k_dev) unsafe_free!(lda_dev) unsafe_free!(ldb_dev) unsafe_free!(ldc_dev) unsafe_free!(alpha_dev) unsafe_free!(beta_dev) unsafe_free!(groupsize_dev) C end end end function gemm_batched(transA::Char, transB::Char, alpha::Number, A::Vector{<:oneStridedMatrix{T}}, B::Vector{<:oneStridedMatrix{T}}) where T C = oneMatrix{T}[similar(B[1], (size(A[1], transA == 'N' ? 1 : 2),size(B[1], transB == 'N' ? 2 : 1))) for i in 1:length(A)] gemm_batched!(transA, transB, alpha, A, B, zero(T), C ) end function gemm_batched(transA::Char, transB::Char, A::Vector{<:oneStridedMatrix{T}}, B::Vector{<:oneStridedMatrix{T}}) where T gemm_batched(transA, transB, one(T), A, B) end ## (TR) triangular triangular matrix solution batched for (fname, elty) in ((:onemklDtrsm_batch, :Float64), (:onemklStrsm_batch, :Float32), (:onemklCtrsm_batch, :ComplexF32), (:onemklZtrsm_batch, :ComplexF64)) @eval begin function trsm_batched!(side::Char, uplo::Char, transa::Char, diag::Char, alpha::Number, A::Vector{<:oneStridedMatrix{$elty}}, B::Vector{<:oneStridedMatrix{$elty}}) if length(A) != length(B) throw(DimensionMismatch("")) end for (As,Bs) in zip(A,B) mA, nA = size(As) m,n = size(Bs) if mA != nA throw(DimensionMismatch("A must be square")) end if nA != (side == 'L' ? m : n) throw(DimensionMismatch("trsm_batched!")) end end m,n = size(B[1]) lda = max(1,stride(A[1],2)) ldb = max(1,stride(B[1],2)) Aptrs = unsafe_batch(A) Bptrs = unsafe_batch(B) bsize = length(A) m_dev = oneVector{Int}(fill(m,bsize)) n_dev = oneVector{Int}(fill(n,bsize)) lda_dev = oneVector{Int}(fill(lda,bsize)) ldb_dev = oneVector{Int}(fill(ldb,bsize)) alpha_dev = oneVector{$elty}(fill(alpha,bsize)) groupsize_dev = oneVector{Int}(fill(1,bsize)) queue = global_queue(context(A[1]), device(A[1])) $fname(sycl_queue(queue), side, uplo, transa, diag, m_dev, n_dev, alpha_dev, Aptrs, lda_dev, Bptrs, ldb_dev, length(A), groupsize_dev) unsafe_free!(Bptrs) unsafe_free!(Aptrs) unsafe_free!(m_dev) unsafe_free!(n_dev) unsafe_free!(lda_dev) unsafe_free!(ldb_dev) unsafe_free!(alpha_dev) unsafe_free!(groupsize_dev) B end end end function trsm_batched(side::Char, uplo::Char, transa::Char, diag::Char, alpha::Number, A::Vector{<:oneStridedMatrix{T}}, B::Vector{<:oneStridedMatrix{T}}) where T trsm_batched!(side, uplo, transa, diag, alpha, A, copy(B) ) end ## (L3: symm) symmetric matrix-matrix and matrix-vector multiplication for (fname, elty) in ((:onemklSsymm, :Float32), (:onemklDsymm, :Float64), (:onemklCsymm, :ComplexF32), (:onemklZsymm, :ComplexF64)) @eval begin function symm!(side::Char, uplo::Char, alpha::Number, A::oneStridedVecOrMat{$elty}, B::oneStridedVecOrMat{$elty}, beta::Number, C::oneStridedVecOrMat{$elty}) k, nA = size(A) if k != nA throw(DimensionMismatch("Matrix A must be square")) end m = side == 'L' ? k : size(B,1) n = side == 'L' ? size(B,2) : k if m != size(C,1) || n != size(C,2) || k != size(B, side == 'L' ? 1 : 2) throw(DimensionMismatch("")) end lda = max(1,stride(A,2)) ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) queue = global_queue(context(A), device(A)) $fname(sycl_queue(queue), side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc) C end end end function symm(side::Char, uplo::Char, alpha::Number, A::oneStridedVecOrMat{T}, B::oneStridedVecOrMat{T}) where T symm!(side, uplo, alpha, A, B, zero(T), similar(B)) end function symm(side::Char, uplo::Char, A::oneStridedVecOrMat{T}, B::oneStridedVecOrMat{T}) where T symm(side, uplo, one(T), A, B) end ## syrk for (fname, elty) in ((:onemklSsyrk, :Float32), (:onemklDsyrk, :Float64), (:onemklCsyrk, :ComplexF32), (:onemklZsyrk, :ComplexF64)) @eval begin function syrk!(uplo::Char, trans::Char, alpha::Number, A::oneStridedVecOrMat{$elty}, beta::Number, C::oneStridedMatrix{$elty}) mC, n = size(C) if mC != n throw(DimensionMismatch("C must be square")) end nn = size(A, trans == 'N' ? 1 : 2) if nn != n throw(DimensionMismatch("syrk!")) end k = size(A, trans == 'N' ? 2 : 1) lda = max(1,stride(A,2)) ldc = max(1,stride(C,2)) queue = global_queue(context(A), device(A)) $fname(sycl_queue(queue), uplo, trans, n, k, alpha, A, lda, beta, C, ldc) C end end end function syrk(uplo::Char, trans::Char, alpha::Number, A::oneStridedVecOrMat{T}) where T n = size(A, trans == 'N' ? 1 : 2) syrk!(uplo, trans, alpha, A, zero(T), similar(A, (n, n))) end syrk(uplo::Char, trans::Char, A::oneStridedVecOrMat) = syrk(uplo, trans, one(eltype(A)), A) ## syr2k for (fname, elty) in ((:onemklDsyr2k,:Float64), (:onemklSsyr2k,:Float32), (:onemklZsyr2k,:ComplexF64), (:onemklCsyr2k,:ComplexF32)) @eval begin function syr2k!(uplo::Char, trans::Char, alpha::Number, A::oneStridedVecOrMat{$elty}, B::oneStridedVecOrMat{$elty}, beta::Number, C::oneStridedVecOrMat{$elty}) m, n = size(C) if m != n throw(DimensionMismatch("C must be square")) end nA = size(A, trans == 'N' ? 1 : 2) nB = size(B, trans == 'N' ? 1 : 2) if nA != n throw(DimensionMismatch("First dimension of op(A) must match C")) end if nB != n throw(DimensionMismatch("First dimension of op(B.') must match C")) end k = size(A, trans == 'N' ? 2 : 1) if k != size(B, trans == 'N' ? 2 : 1) throw(DimensionMismatch( "Inner dimensions of op(A) and op(B.') must match")) end lda = max(1,stride(A,2)) ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) queue = global_queue(context(A), device(A)) $fname(sycl_queue(queue), uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc) C end end end function syr2k(uplo::Char, trans::Char, alpha::Number, A::oneStridedVecOrMat{T}, B::oneStridedVecOrMat{T}) where T n = size(A, trans == 'N' ? 1 : 2) syr2k!(uplo, trans, convert(T, alpha), A, B, zero(T), similar(A, (n, n))) end syr2k(uplo::Char, trans::Char, A::oneStridedVecOrMat, B::oneStridedVecOrMat) = syr2k(uplo, trans, one(eltype(A)), A, B) ## herk for (fname, elty) in ((:onemklZherk, :ComplexF64), (:onemklCherk, :ComplexF32)) @eval begin function herk!(uplo::Char, trans::Char, alpha::Real, A::oneStridedVecOrMat{$elty}, beta::Real, C::oneStridedMatrix{$elty}) mC, n = size(C) if mC != n throw(DimensionMismatch("C must be square")) end nn = size(A, trans == 'N' ? 1 : 2) if nn != n throw(DimensionMismatch("herk!")) end k = size(A, trans == 'N' ? 2 : 1) lda = max(1,stride(A,2)) ldc = max(1,stride(C,2)) queue = global_queue(context(A), device(A)) $fname(sycl_queue(queue), uplo, trans, n, k, alpha, A, lda, beta, C, ldc) C end end end function herk(uplo::Char, trans::Char, alpha::Real, A::oneStridedVecOrMat{T}) where T n = size(A, trans == 'N' ? 1 : 2) herk!(uplo, trans, alpha, A, zero(real(T)), similar(A, (n,n))) end herk(uplo::Char, trans::Char, A::oneStridedVecOrMat{T}) where T = herk(uplo, trans, one(real(T)), A) ## her2k for (fname, elty) in ((:onemklZher2k,:ComplexF64), (:onemklCher2k,:ComplexF32)) @eval begin function her2k!(uplo::Char, trans::Char, alpha::Number, A::oneStridedVecOrMat{$elty}, B::oneStridedVecOrMat{$elty}, beta::Real, C::oneStridedMatrix{$elty}) m, n = size(C) if m != n throw(DimensionMismatch("C must be square")) end nA = size(A, trans == 'N' ? 1 : 2) nB = size(B, trans == 'N' ? 1 : 2) if nA != n throw(DimensionMismatch("First dimension of op(A) must match C")) end if nB != n throw(DimensionMismatch("First dimension of op(B.') must match C")) end k = size(A, trans == 'N' ? 2 : 1) if k != size(B, trans == 'N' ? 2 : 1) throw(DimensionMismatch("Inner dimensions of op(A) and op(B.') must match")) end lda = max(1,stride(A,2)) ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) queue = global_queue(context(A), device(A)) $fname(sycl_queue(queue), uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc) C end end end function her2k(uplo::Char, trans::Char, alpha::Number, A::oneStridedVecOrMat{T}, B::oneStridedVecOrMat{T}) where T n = size(A, trans == 'N' ? 1 : 2) her2k!(uplo, trans, alpha, A, B, zero(real(T)), similar(A, (n,n))) end her2k(uplo::Char, trans::Char, A::oneStridedVecOrMat{T}, B::oneStridedVecOrMat{T}) where T = her2k(uplo, trans, one(T), A, B) # level 2 ## gemv for (fname, elty) in ((:onemklSgemv, :Float32), (:onemklDgemv, :Float64), (:onemklCgemv, :ComplexF32), (:onemklZgemv, :ComplexF64)) @eval begin function gemv!(trans::Char, alpha::Number, a::oneStridedArray{$elty}, x::oneStridedArray{$elty}, beta::Number, y::oneStridedArray{$elty}) queue = global_queue(context(x), device(x)) # handle trans m,n = size(a) # check dimensions length(x) == (trans == 'N' ? n : m) && length(y) == (trans == 'N' ? m : n) || throw(DimensionMismatch("")) # compute increments lda = max(1,stride(a,2)) incx = stride(x,1) incy = stride(y,1) $fname(sycl_queue(queue), trans, m, n, alpha, a, lda, x, incx, beta, y, incy) y end end end function gemv(trans::Char, alpha::Number, a::oneStridedArray{T}, x::oneStridedArray{T}) where T gemv!(trans, alpha, a, x, zero(T), similar(x, size(a, (trans == 'N' ? 1 : 2)))) end function gemv(trans::Char, a::oneStridedArray{T}, x::oneStridedArray{T}) where T gemv!(trans, one(T), a, x, zero(T), similar(x, size(a, (trans == 'N' ? 1 : 2)))) end ### hemv for (fname, elty) in ((:onemklChemv,:ComplexF32), (:onemklZhemv,:ComplexF64)) @eval begin function hemv!(uplo::Char, alpha::Number, A::oneStridedVecOrMat{$elty}, x::oneStridedVecOrMat{$elty}, beta::Number, y::oneStridedVecOrMat{$elty}) m, n = size(A) if m != n throw(DimensionMismatch("Matrix A is $m by $n but must be square")) end if m != length(x) || m != length(y) throw(DimensionMismatch("")) end lda = max(1,stride(A,2)) incx = stride(x,1) incy = stride(y,1) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, n, alpha, A, lda, x, incx, beta, y, incy) y end end end function hemv(uplo::Char, alpha::Number, A::oneStridedVecOrMat{T}, x::oneStridedVecOrMat{T}) where T hemv!(uplo, alpha, A, x, zero(T), similar(x)) end function hemv(uplo::Char, A::oneStridedVecOrMat{T}, x::oneStridedVecOrMat{T}) where T hemv(uplo, one(T), A, x) end ### hbmv, (HB) Hermitian banded matrix-vector multiplication for (fname, elty) in ((:onemklChbmv,:ComplexF32), (:onemklZhbmv,:ComplexF64)) @eval begin function hbmv!(uplo::Char, k::Integer, alpha::Number, A::oneStridedMatrix{$elty}, x::oneStridedVector{$elty}, beta::Number, y::oneStridedVector{$elty}) m, n = size(A) if !(1<=(1+k)<=n) throw(DimensionMismatch("Incorrect number of bands")) end if m < 1+k throw(DimensionMismatch("Array A has fewer than 1+k rows")) end if n != length(x) || n != length(y) throw(DimensionMismatch("")) end lda = max(1,stride(A,2)) incx = stride(x,1) incy = stride(y,1) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, n, k, alpha, A, lda, x, incx, beta, y, incy) y end end end function hbmv(uplo::Char, k::Integer, alpha::Number, A::oneStridedMatrix{T}, x::oneStridedVector{T}) where T n = size(A,2) hbmv!(uplo, k, alpha, A, x, zero(T), similar(x, n)) end function hbmv(uplo::Char, k::Integer, A::oneStridedMatrix{T}, x::oneStridedVector{T}) where T hbmv(uplo, k, one(T), A, x) end ### her for (fname, elty) in ((:onemklCher,:ComplexF32), (:onemklZher,:ComplexF64)) @eval begin function her!(uplo::Char, alpha::Number, x::oneStridedVecOrMat{$elty}, A::oneStridedVecOrMat{$elty}) m, n = size(A) m == n || throw(DimensionMismatch("Matrix A is $m by $n but must be square")) length(x) == n || throw(DimensionMismatch("Length of vector must be the same as the matrix dimensions")) incx = stride(x,1) lda = max(1,stride(A,2)) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, n, alpha, x, incx, A, lda) A end end end ### her2 for (fname, elty) in ((:onemklCher2,:ComplexF32), (:onemklZher2,:ComplexF64)) @eval begin function her2!(uplo::Char, alpha::Number, x::oneStridedVecOrMat{$elty}, y::oneStridedVecOrMat{$elty}, A::oneStridedVecOrMat{$elty}) m, n = size(A) m == n || throw(DimensionMismatch("Matrix A is $m by $n but must be square")) length(x) == n || throw(DimensionMismatch("Length of vector must be the same as the matrix dimensions")) length(y) == n || throw(DimensionMismatch("Length of vector must be the same as the matrix dimensions")) incx = stride(x,1) incy = stride(y,1) lda = max(1,stride(A,2)) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, n, alpha, x, incx, y, incy, A, lda) A end end end # level 1 ## axpy for (fname, elty) in ((:onemklDaxpy,:Float64), (:onemklSaxpy,:Float32), (:onemklHaxpy,:Float16), (:onemklZaxpy,:ComplexF64), (:onemklCaxpy,:ComplexF32)) @eval begin function axpy!(n::Integer, alpha::Number, x::oneStridedArray{$elty}, y::oneStridedArray{$elty}) queue = global_queue(context(x), device(x)) alpha = $elty(alpha) $fname(sycl_queue(queue), n, alpha, x, stride(x,1), y, stride(y,1)) y end end end ## axpby for (fname, elty) in ((:onemklDaxpby,:Float64), (:onemklSaxpby,:Float32), (:onemklZaxpby,:ComplexF64), (:onemklCaxpby,:ComplexF32)) @eval begin function axpby!(n::Integer, alpha::Number, x::oneStridedArray{$elty}, beta::Number, y::oneStridedArray{$elty}) queue = global_queue(context(x), device(x)) alpha = $elty(alpha) beta = $elty(beta) $fname(sycl_queue(queue), n, alpha, x, stride(x,1), beta, y, stride(y,1)) y end end end ## rot for (fname, elty, cty, sty, supty) in ((:onemklSrot,:Float32,:Float32,:Float32,:Number), (:onemklDrot,:Float64,:Float64,:Float64,:Number), (:onemklCrot,:ComplexF32,:Float32,:ComplexF32,:Number), (:onemklZrot,:ComplexF64,:Float64,:ComplexF64,:Number), (:onemklCSrot,:ComplexF32,:Float32,:Float32,:Real), (:onemklZDrot,:ComplexF64,:Float64,:Float64,:Real)) @eval begin function rot!(n::Integer, x::oneStridedArray{$elty}, y::oneStridedArray{$elty}, c::Real, s::$supty) queue = global_queue(context(x), device(x)) c = $cty(c) s = $sty(s) $fname(sycl_queue(queue), n, x, stride(x, 1), y, stride(y, 1), c, s) x, y end end end function axpy!(n::Integer, alpha::Number, x::oneStridedArray{ComplexF16}, y::oneStridedArray{ComplexF16}) wide_x = widen.(x) wide_y = widen.(y) axpy!(n, alpha, wide_x, wide_y) thin_y = convert(typeof(y), wide_y) copyto!(y, thin_y) return y end ## scal for (fname, elty) in ((:onemklDscal,:Float64), (:onemklSscal,:Float32), (:onemklHscal,:Float16), (:onemklZscal,:ComplexF64), (:onemklCscal,:ComplexF32)) @eval begin function scal!(n::Integer, alpha::$elty, x::oneStridedArray{$elty}) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), n, alpha, x, stride(x,1)) x end end end function scal!(n::Integer, alpha::Number, x::oneStridedArray{ComplexF16}) wide_x = widen.(x) scal!(n, convert(ComplexF32, alpha), wide_x) thin_x = convert(typeof(x), wide_x) copyto!(x, thin_x) return x end ## nrm2 for (fname, elty, ret_type) in ((:onemklDnrm2, :Float64,:Float64), (:onemklSnrm2, :Float32,:Float32), (:onemklHnrm2, :Float16,:Float16), (:onemklCnrm2, :ComplexF32,:Float32), (:onemklZnrm2, :ComplexF64,:Float64)) @eval begin function nrm2(n::Integer, x::oneStridedArray{$elty}) queue = global_queue(context(x), device(x)) result = oneArray{$ret_type}([0]); $fname(sycl_queue(queue), n, x, stride(x,1), result) res = Array(result) return res[1] end end end nrm2(x::oneStridedArray) = nrm2(length(x), x) function nrm2(n::Integer, x::oneStridedArray{ComplexF16}) wide_x = widen.(x) nrm = nrm2(n, wide_x) return convert(Float16, nrm) end ## dot for (jname, fname, elty) in ((:dot, :onemklSdot,:Float32), (:dot, :onemklDdot,:Float64), (:dot, :onemklHdot,:Float16), (:dotc, :onemklCdotc, :ComplexF32), (:dotc, :onemklZdotc, :ComplexF64), (:dotu, :onemklCdotu, :ComplexF32), (:dotu, :onemklZdotu, :ComplexF64)) @eval begin function $jname(n::Integer, x::oneStridedArray{$elty}, y::oneStridedArray{$elty}) queue = global_queue(context(x), device(x)) result = oneArray{$elty}([0]); $fname(sycl_queue(queue), n, x, stride(x,1), y, stride(y,1), result) res = Array(result) return res[1] end end end function dotc(n::Integer, x::oneStridedArray{ComplexF16}, y::oneStridedArray{ComplexF16}) convert(ComplexF16, dotc(n, convert(oneArray{ComplexF32}, x), convert(oneArray{ComplexF32}, y))) end function dotu(n::Integer, x::oneStridedArray{ComplexF16}, y::oneStridedArray{ComplexF16}) convert(ComplexF16, dotu(n, convert(oneArray{ComplexF32}, x), convert(oneArray{ComplexF32}, y))) end # level 2 # sbmv, symmetric banded matrix-vector multiplication for (fname, elty) in ((:onemklSsbmv, :Float32), (:onemklDsbmv, :Float64)) @eval begin function sbmv!(uplo::Char, k::Integer, alpha::Number, a::oneStridedVecOrMat{$elty}, x::oneStridedVecOrMat{$elty}, beta::Number, y::oneStridedVecOrMat{$elty}) m, n = size(a) if !(1<=(1+k)<=n) throw(DimensionMismatch("Incorrect number of bands")) end if m < 1+k throw(DimensionMismatch("Array A has fewer than 1+k rows")) end if n != length(x) || n != length(y) throw(DimensionMismatch("")) end queue = global_queue(context(x), device(x)) lda = max(1, stride(a,2)) incx = stride(x,1) incy = stride(y,1) alpha = $elty(alpha) beta = $elty(beta) $fname(sycl_queue(queue), uplo, n, k, alpha, a, lda, x, incx, beta, y, incy) y end end end function sbmv(uplo::Char, k::Integer, alpha::Number, a::oneStridedArray{T}, x::oneStridedArray{T}) where T n = size(a,2) sbmv!(uplo, k, alpha, a, x, zero(T), similar(x, n)) end function sbmv(uplo::Char, k::Integer, a::oneStridedArray{T}, x::oneStridedArray{T}) where T sbmv(uplo, k, one(T), a, x) end for (fname, elty, celty) in ((:onemklCSscal, :Float32, :ComplexF32), (:onemklZDscal, :Float64, :ComplexF64)) @eval begin function scal!(n::Integer, alpha::$elty, x::oneStridedArray{$celty}) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), n, alpha, x, stride(x,1)) end end end # level 2 # ger for (fname, elty) in ((:onemklSger, :Float32), (:onemklDger, :Float64), (:onemklCgerc, :ComplexF32), (:onemklZgerc, :ComplexF64)) @eval begin function ger!(alpha::Number, x::oneStridedVecOrMat{$elty}, y::oneStridedVecOrMat{$elty}, a::oneStridedVecOrMat{$elty}) m,n = size(a) m == length(x) || throw(DimensionMismatch("")) n == length(y) || throw(DimensionMismatch("")) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), m, n, alpha, x, stride(x,1), y, stride(y,1), a, max(1,stride(a,2))) a end end end # spr for (fname, elty) in ((:onemklSspr, :Float32), (:onemklDspr, :Float64)) @eval begin function spr!(uplo::Char, alpha::Number, x::oneStridedVector{$elty}, A::oneStridedVector{$elty}) n = round(Int, (sqrt(8*length(A))-1)/2) length(x) == n || throw(DimensionMismatch("Length of vector must be the same as the matrix dimensions")) incx = stride(x,1) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, n, alpha, x, incx, A) A end end end #symv for (fname, elty) in ((:onemklSsymv,:Float32), (:onemklDsymv,:Float64)) # Note that the complex symv are not BLAS but auxiliary functions in LAPACK @eval begin function symv!(uplo::Char, alpha::Number, A::oneStridedVecOrMat{$elty}, x::oneStridedVecOrMat{$elty}, beta::Number, y::oneStridedVecOrMat{$elty}) m, n = size(A) if m != n throw(DimensionMismatch("Matrix A is $m by $n but must be square")) end if m != length(x) || m != length(y) throw(DimensionMismatch("")) end lda = max(1,stride(A,2)) incx = stride(x,1) incy = stride(y,1) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, n, alpha, A, lda, x, incx, beta, y, incy) y end end end function symv(uplo::Char, alpha::Number, A::oneStridedVecOrMat{T}, x::oneStridedVecOrMat{T}) where T symv!(uplo, alpha, A, x, zero(T), similar(x)) end function symv(uplo::Char, A::oneStridedVecOrMat{T}, x::oneStridedVecOrMat{T}) where T symv(uplo, one(T), A, x) end # syr for (fname, elty) in ((:onemklSsyr,:Float32), (:onemklDsyr,:Float64)) @eval begin function syr!(uplo::Char, alpha::Number, x::oneStridedVecOrMat{$elty}, A::oneStridedVecOrMat{$elty}) m, n = size(A) m == n || throw(DimensionMismatch("Matrix A is $m by $n but must be square")) length(x) == n || throw(DimensionMismatch("Length of vector must be the same as the matrix dimensions")) incx = stride(x,1) lda = max(1,stride(A,2)) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, n, alpha, x, incx, A, lda) A end end end # # BLAS # # level 1 ## copy for (fname, elty) in ((:onemklDcopy,:Float64), (:onemklScopy,:Float32), (:onemklZcopy,:ComplexF64), (:onemklCcopy,:ComplexF32)) @eval begin function copy!(n::Integer, x::oneStridedArray{$elty}, y::oneStridedArray{$elty}) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), n, x, stride(x, 1), y, stride(y, 1)) y end end end function copy!(n::Integer, x::oneStridedArray{T}, y::oneStridedArray{T}) where {T <: Union{Float16, ComplexF16}} copyto!(y,x) end ## asum for (fname, elty, ret_type) in ((:onemklSasum, :Float32, :Float32), (:onemklDasum, :Float64, :Float64), (:onemklCasum, :ComplexF32, :Float32), (:onemklZasum, :ComplexF64, :Float64)) @eval begin function asum(n::Integer, x::oneStridedArray{$elty}) result = oneArray{$ret_type}([0]) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), n, x, stride(x, 1), result) res = Array(result) return res[1] end end end ## iamax for (fname, elty) in ((:onemklDiamax_64,:Float64), (:onemklSiamax_64,:Float32), (:onemklZiamax_64,:ComplexF64), (:onemklCiamax_64,:ComplexF32)) @eval begin function iamax(x::oneStridedArray{$elty}) n = length(x) queue = global_queue(context(x), device(x)) result = oneArray{Int64}([0]); $fname(sycl_queue(queue), n, x, stride(x, 1), result, 'O') return Array(result)[1] end end end ## iamin for (fname, elty) in ((:onemklDiamin_64,:Float64), (:onemklSiamin_64,:Float32), (:onemklZiamin_64,:ComplexF64), (:onemklCiamin_64,:ComplexF32)) @eval begin function iamin(x::StridedArray{$elty}) n = length(x) result = oneArray{Int64}([0]); queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue),n, x, stride(x, 1), result, 'O') return Array(result)[1] end end end ## swap for (fname, elty) in ((:onemklSswap,:Float32), (:onemklDswap,:Float64), (:onemklCswap,:ComplexF32), (:onemklZswap,:ComplexF64)) @eval begin function swap!(n::Integer, x::oneStridedArray{$elty}, y::oneStridedArray{$elty}) # Assuming both memory allocated on same device & context queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), n, x, stride(x, 1), y, stride(y, 1)) x, y end end end # level 2 # gbmv for (fname, elty) in ((:onemklSgbmv, :Float32), (:onemklDgbmv, :Float64), (:onemklCgbmv, :ComplexF32), (:onemklZgbmv, :ComplexF64)) @eval begin function gbmv!(trans::Char, m::Integer, kl::Integer, ku::Integer, alpha::Number, a::oneStridedArray{$elty}, x::oneStridedArray{$elty}, beta::Number, y::oneStridedArray{$elty}) n = size(a,2) length(x) == (trans == 'N' ? n : m) && length(y) == (trans == 'N' ? m : n) || throw(DimensionMismatch("")) queue = global_queue(context(x), device(x)) lda = max(1, stride(a,2)) incx = stride(x,1) incy = stride(y,1) $fname(sycl_queue(queue), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy) y end end end function gbmv(trans::Char, m::Integer, kl::Integer, ku::Integer, alpha::Number, a::oneStridedArray{T}, x::oneStridedArray{T}) where T n = size(a,2) leny = trans == 'N' ? m : n queue = global_queue(context(x), device(x)) gbmv!(trans, m, kl, ku, alpha, a, x, zero(T), similar(x, leny)) end function gbmv(trans::Char, m::Integer, kl::Integer, ku::Integer, a::oneStridedArray{T}, x::oneStridedArray{T}) where T queue = global_queue(context(x), device(x)) gbmv(trans, m, kl, ku, one(T), a, x) end # spmv for (fname, elty) in ((:onemklSspmv, :Float32), (:onemklDspmv, :Float64)) @eval begin function spmv!(uplo::Char, alpha::Number, A::oneStridedVector{$elty}, x::oneStridedVector{$elty}, beta::Number, y::oneStridedVector{$elty}) n = round(Int, (sqrt(8*length(A))-1)/2) if n != length(x) || n != length(y) throw(DimensionMismatch("")) end incx = stride(x,1) incy = stride(y,1) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, n, alpha, A, x, incx, beta, y, incy) y end end end function spmv(uplo::Char, alpha::Number, A::oneStridedVector{T}, x::oneStridedVector{T}) where T spmv!(uplo, alpha, A, x, zero(T), similar(x)) end function spmv(uplo::Char, A::oneStridedVector{T}, x::oneStridedVector{T}) where T spmv(uplo, one(T), A, x) end # tbsv, (TB) triangular banded matrix solve for (fname, elty) in ((:onemklStbsv, :Float32), (:onemklDtbsv, :Float64), (:onemklCtbsv, :ComplexF32), (:onemklZtbsv, :ComplexF64)) @eval begin function tbsv!(uplo::Char, trans::Char, diag::Char, k::Integer, A::oneStridedMatrix{$elty}, x::oneStridedVector{$elty}) m, n = size(A) if !(1<=(1+k)<=n) throw(DimensionMismatch("Incorrect number of bands")) end if m < 1+k throw(DimensionMismatch("Array A has fewer than 1+k rows")) end if n != length(x) throw(DimensionMismatch("")) end lda = max(1,stride(A,2)) incx = stride(x,1) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, trans, diag, n, k, A, lda, x, incx) x end end end function tbsv(uplo::Char, trans::Char, diag::Char, k::Integer, A::oneStridedMatrix{T}, x::oneStridedVector{T}) where T tbsv!(uplo, trans, diag, k, A, copy(x)) end # tbmv ### tbmv, (TB) triangular banded matrix-vector multiplication for (fname, elty) in ((:onemklStbmv,:Float32), (:onemklDtbmv,:Float64), (:onemklCtbmv,:ComplexF32), (:onemklZtbmv,:ComplexF64)) @eval begin function tbmv!(uplo::Char, trans::Char, diag::Char, k::Integer, A::oneStridedVecOrMat{$elty}, x::oneStridedVecOrMat{$elty}) m, n = size(A) if !(1<=(1+k)<=n) throw(DimensionMismatch("Incorrect number of bands")) end if m < 1+k throw(DimensionMismatch("Array A has fewer than 1+k rows")) end if n != length(x) throw(DimensionMismatch("")) end lda = max(1,stride(A,2)) incx = stride(x,1) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, trans, diag, n, k, A, lda, x, incx) x end end end function tbmv(uplo::Char, trans::Char, diag::Char, k::Integer, A::oneStridedVecOrMat{T}, x::oneStridedVecOrMat{T}) where T tbmv!(uplo, trans, diag, k, A, copy(x)) end ### trmv, Triangular matrix-vector multiplication for (fname, elty) in ((:onemklStrmv, :Float32), (:onemklDtrmv, :Float64), (:onemklCtrmv, :ComplexF32), (:onemklZtrmv, :ComplexF64)) @eval begin function trmv!(uplo::Char, trans::Char, diag::Char, A::oneStridedVecOrMat{$elty}, x::oneStridedVecOrMat{$elty}) m, n = size(A) if m != n throw(DimensionMismatch("Matrix A is $m by $n but must be square")) end if n != length(x) throw(DimensionMismatch("length(x)=$(length(x)) does not match size(A)=$(size(A))")) end lda = max(1,stride(A,2)) incx = stride(x,1) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, trans, diag, n, A, lda, x, incx) x end end end function trmv(uplo::Char, trans::Char, diag::Char, A::oneStridedVecOrMat{T}, x::oneStridedVecOrMat{T}) where T trmv!(uplo, trans, diag, A, copy(x)) end ### trsv, Triangular matrix-vector solve for (fname, elty) in ((:onemklStrsv, :Float32), (:onemklDtrsv, :Float64), (:onemklCtrsv, :ComplexF32), (:onemklZtrsv, :ComplexF64)) @eval begin function trsv!(uplo::Char, trans::Char, diag::Char, A::oneStridedVecOrMat{$elty}, x::oneStridedVecOrMat{$elty}) m, n = size(A) if m != n throw(DimensionMismatch("Matrix A is $m by $n but must be square")) end if n != length(x) throw(DimensionMismatch("length(x)=$(length(x)) does not match size(A)=$(size(A))")) end lda = max(1,stride(A,2)) incx = stride(x,1) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), uplo, trans, diag, n, A, lda, x, incx) x end end end function trsv(uplo::Char, trans::Char, diag::Char, A::oneStridedVecOrMat{T}, x::oneStridedVecOrMat{T}) where T trsv!(uplo, trans, diag, A, copy(x)) end # level 3 for (mmname, smname, elty) in ((:onemklDtrmm, :onemklDtrsm, :Float64), (:onemklStrmm, :onemklStrsm, :Float32), (:onemklZtrmm, :onemklZtrsm, :ComplexF64), (:onemklCtrmm, :onemklCtrsm, :ComplexF32)) @eval begin function trmm!(side::Char, uplo::Char, transa::Char, diag::Char, alpha::Number, A::oneStridedMatrix{$elty}, B::oneStridedMatrix{$elty}) m, n = size(B) mA, nA = size(A) if mA != nA throw(DimensionMismatch("A must be square")) end if nA != (side == 'L' ? m : n) throw(DimensionMismatch("trmm!")) end lda = max(1,stride(A,2)) ldb = max(1,stride(B,2)) queue = global_queue(context(A), device(A)) $mmname(sycl_queue(queue), side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb) B end function trsm!(side::Char, uplo::Char, transa::Char, diag::Char, alpha::Number, A::oneStridedMatrix{$elty}, B::oneStridedMatrix{$elty}) m, n = size(B) mA, nA = size(A) if mA != nA throw(DimensionMismatch("A must be square")) end if nA != (side == 'L' ? m : n) throw(DimensionMismatch("trsm!")) end lda = max(1,stride(A,2)) ldb = max(1,stride(B,2)) queue = global_queue(context(A), device(A)) $smname(sycl_queue(queue), side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb) B end end end function trmm(side::Char, uplo::Char, transa::Char, diag::Char, alpha::Number, A::oneStridedMatrix{T}, B::oneStridedMatrix{T}) where T trmm!(side, uplo, transa, diag, alpha, A, copy(B)) end function trsm(side::Char, uplo::Char, transa::Char, diag::Char, alpha::Number, A::oneStridedMatrix{T}, B::oneStridedMatrix{T}) where T trsm!(side, uplo, transa, diag, alpha, A, copy(B)) end ## hemm for (fname, elty) in ((:onemklZhemm,:ComplexF64), (:onemklChemm,:ComplexF32)) @eval begin function hemm!(side::Char, uplo::Char, alpha::Number, A::oneStridedMatrix{$elty}, B::oneStridedMatrix{$elty}, beta::Number, C::oneStridedMatrix{$elty}) mA, nA = size(A) m, n = size(B) mC, nC = size(C) if mA != nA throw(DimensionMismatch("A must be square")) end if ((m != mC) || (n != nC)) throw(DimensionMismatch("B and C must have same dimensions")) end if ((side == 'L') && (mA != m)) throw(DimensionMismatch("")) end if ((side == 'R') && (mA != n)) throw(DimensionMismatch("")) end lda = max(1,stride(A,2)) ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) queue = global_queue(context(A), device(A)) $fname(sycl_queue(queue), side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc) C end end end function hemm(uplo::Char, trans::Char, alpha::Number, A::oneStridedMatrix{T}, B::oneStridedMatrix{T}) where T m,n = size(B) hemm!( uplo, trans, alpha, A, B, zero(T), similar(B, (m,n) ) ) end hemm(uplo::Char, trans::Char, A::oneStridedMatrix{T}, B::oneStridedMatrix{T}) where T= hemm( uplo, trans, one(T), A, B) for (fname, elty) in ((:onemklDgemm,:Float64), (:onemklSgemm,:Float32), (:onemklHgemm,:Float16), (:onemklZgemm,:ComplexF64), (:onemklCgemm,:ComplexF32)) @eval begin function gemm!(transA::Char, transB::Char, alpha::Number, A::oneStridedVecOrMat{$elty}, B::oneStridedVecOrMat{$elty}, beta::Number, C::oneStridedVecOrMat{$elty}) m = size(A, transA == 'N' ? 1 : 2) k = size(A, transA == 'N' ? 2 : 1) n = size(B, transB == 'N' ? 2 : 1) if m != size(C,1) || n != size(C,2) || k != size(B, transB == 'N' ? 1 : 2) throw(DimensionMismatch("")) end lda = max(1,stride(A,2)) ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) device(A) == device(B) == device(C) || error("Multi-device GEMM not supported") context(A) == context(B) == context(C) || error("Multi-context GEMM not supported") queue = global_queue(context(A), device(A)) alpha = $elty(alpha) beta = $elty(beta) $fname(sycl_queue(queue), transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) C end end end function gemm(transA::Char, transB::Char, alpha::Number, A::oneStridedVecOrMat{T}, B::oneStridedVecOrMat{T}) where T gemm!(transA, transB, alpha, A, B, zero(T), similar(B, (size(A, transA == 'N' ? 1 : 2), size(B, transB == 'N' ? 2 : 1)))) end function gemm(transA::Char, transB::Char, A::oneStridedVecOrMat{T}, B::oneStridedVecOrMat{T}) where T gemm(transA, transB, one(T), A, B) end ## dgmm for (fname, elty) in ((:onemklSdgmm, :Float32), (:onemklDdgmm, :Float64), (:onemklCdgmm, :ComplexF32), (:onemklZdgmm, :ComplexF64)) @eval begin function dgmm!(mode::Char, A::oneStridedMatrix{$elty}, X::oneStridedVector{$elty}, C::oneStridedMatrix{$elty}) m, n = size(C) mA, nA = size(A) lx = length(X) if ((mA != m) || (nA != n )) throw(DimensionMismatch("")) end if ((mode == 'L') && (lx != m)) throw(DimensionMismatch("")) end if ((mode == 'R') && (lx != n)) throw(DimensionMismatch("")) end lda = max(1,stride(A,2)) incx = stride(X,1) ldc = max(1,stride(C,2)) queue = global_queue(context(A), device(A)) $fname(sycl_queue(queue), mode, m, n, A, lda, X, incx, C, ldc) C end end end function dgmm(mode::Char, A::oneStridedMatrix{T}, X::oneStridedVector{T}) where T m,n = size(A) dgmm!( mode, A, X, similar(A, (m,n) ) ) end for (fname, elty) in ((:onemklHgemm_batch_strided, Float16), (:onemklSgemm_batch_strided, Float32), (:onemklDgemm_batch_strided, Float64), (:onemklCgemm_batch_strided, ComplexF32), (:onemklZgemm_batch_strided, ComplexF64)) @eval begin function gemm_strided_batched!(transA::Char, transB::Char, alpha::Number, A::AbstractArray{$elty, 3}, B::AbstractArray{$elty, 3}, beta::Number, C::AbstractArray{$elty, 3}) m = size(A, transA == 'N' ? 1 : 2) k = size(A, transA == 'N' ? 2 : 1) n = size(B, transB == 'N' ? 2 : 1) @assert size(A, 3) == size(C, 3) || size(A, 3) == 1 "batch size mismatch: A != C" @assert size(B, 3) == size(C, 3) || size(B, 3) == 1 "batch size mismatch: B != C" if m != size(C,1) || n != size(C,2) || k != size(B, transB == 'N' ? 1 : 2) throw(DimensionMismatch("")) end lda = max(1,stride(A,2)) ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) strideA = size(A, 3) == 1 ? 0 : stride(A, 3) strideB = size(B, 3) == 1 ? 0 : stride(B, 3) strideC = stride(C, 3) batchCount = size(C, 3) queue = global_queue(context(A), device(A)) alpha = $elty(alpha) beta = $elty(beta) $fname(sycl_queue(queue), transA, transB, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount) C end end end function gemm_strided_batched(transA::Char, transB::Char, alpha::Number, A::AbstractArray{T, 3}, B::AbstractArray{T, 3}) where T C = similar(B, (size(A, transA == 'N' ? 1 : 2), size(B, transB == 'N' ? 2 : 1), max(size(A, 3), size(B, 3)))) gemm_strided_batched!(transA, transB, alpha, A, B, zero(T), C ) end function gemm_strided_batched(transA::Char, transB::Char, A::AbstractArray{T, 3}, B::AbstractArray{T,3}) where T gemm_strided_batched(transA, transB, one(T), A, B) end ================================================ FILE: lib/mkl/wrappers_lapack.jl ================================================ # potrf for (bname, fname, elty) in ((:onemklSpotrf_scratchpad_size, :onemklSpotrf, :Float32), (:onemklDpotrf_scratchpad_size, :onemklDpotrf, :Float64), (:onemklCpotrf_scratchpad_size, :onemklCpotrf, :ComplexF32), (:onemklZpotrf_scratchpad_size, :onemklZpotrf, :ComplexF64)) @eval begin function potrf!(uplo::Char, A::oneStridedMatrix{$elty}) chkuplo(uplo) n = checksquare(A) lda = max(1, stride(A, 2)) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), uplo, n, lda) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), uplo, n, A, lda, scratchpad, scratchpad_size) return A end end end # potrs for (bname, fname, elty) in ((:onemklSpotrs_scratchpad_size, :onemklSpotrs, :Float32), (:onemklDpotrs_scratchpad_size, :onemklDpotrs, :Float64), (:onemklCpotrs_scratchpad_size, :onemklCpotrs, :ComplexF32), (:onemklZpotrs_scratchpad_size, :onemklZpotrs, :ComplexF64)) @eval begin function potrs!(uplo::Char, A::oneStridedMatrix{$elty}, B::oneStridedVecOrMat{$elty}) chkuplo(uplo) n = checksquare(A) if size(B, 1) != n throw(DimensionMismatch("first dimension of B, $(size(B,1)), must match second dimension of A, $n")) end nrhs = size(B,2) lda = max(1, stride(A, 2)) ldb = max(1, stride(B, 2)) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), uplo, n, nrhs, lda, ldb) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), uplo, n, nrhs, A, lda, B, ldb, scratchpad, scratchpad_size) return B end end end # potri for (bname, fname, elty) in ((:onemklSpotri_scratchpad_size, :onemklSpotri, :Float32), (:onemklDpotri_scratchpad_size, :onemklDpotri, :Float64), (:onemklCpotri_scratchpad_size, :onemklCpotri, :ComplexF32), (:onemklZpotri_scratchpad_size, :onemklZpotri, :ComplexF64)) @eval begin function potri!(uplo::Char, A::oneStridedMatrix{$elty}) chkuplo(uplo) n = checksquare(A) lda = max(1, stride(A, 2)) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), uplo, n, lda) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), uplo, n, A, lda, scratchpad, scratchpad_size) return A end end end # sytrf for (bname, fname, elty) in ((:onemklSsytrf_scratchpad_size, :onemklSsytrf, :Float32), (:onemklDsytrf_scratchpad_size, :onemklDsytrf, :Float64), (:onemklCsytrf_scratchpad_size, :onemklCsytrf, :ComplexF32), (:onemklZsytrf_scratchpad_size, :onemklZsytrf, :ComplexF64)) @eval begin function sytrf!(uplo::Char, A::oneStridedMatrix{$elty}, ipiv::oneStridedVector{Int64}) chkuplo(uplo) n = checksquare(A) lda = max(1, stride(A, 2)) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), uplo, n, lda) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), uplo, n, A, lda, ipiv, scratchpad, scratchpad_size) return A, ipiv end function sytrf!(uplo::Char, A::oneStridedMatrix{$elty}) n = checksquare(A) ipiv = oneVector{Int64}(undef, n) sytrf!(uplo, A, ipiv) end end end # getrf for (bname, fname, elty) in ((:onemklSgetrf_scratchpad_size, :onemklSgetrf, :Float32), (:onemklDgetrf_scratchpad_size, :onemklDgetrf, :Float64), (:onemklCgetrf_scratchpad_size, :onemklCgetrf, :ComplexF32), (:onemklZgetrf_scratchpad_size, :onemklZgetrf, :ComplexF64)) @eval begin function getrf!(A::oneStridedMatrix{$elty}) m, n = size(A) ipiv = oneVector{Int64}(undef, min(m, n)) getrf!(A, ipiv) end function getrf!(A::oneStridedMatrix{$elty}, ipiv::oneStridedVector{Int64}) m,n = size(A) lda = max(1, stride(A, 2)) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), m, n, lda) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), m, n, A, lda, ipiv, scratchpad, scratchpad_size) return A, ipiv end end end # getrs for (bname, fname, elty) in ((:onemklSgetrs_scratchpad_size, :onemklSgetrs, :Float32), (:onemklDgetrs_scratchpad_size, :onemklDgetrs, :Float64), (:onemklCgetrs_scratchpad_size, :onemklCgetrs, :ComplexF32), (:onemklZgetrs_scratchpad_size, :onemklZgetrs, :ComplexF64)) @eval begin function getrs!(trans::Char, A::oneStridedMatrix{$elty}, ipiv::oneStridedVector{Int64}, B::oneStridedVecOrMat{$elty}) # Support transa = 'C' for real matrices trans = $elty <: Real && trans == 'C' ? 'T' : trans chktrans(trans) n = checksquare(A) if size(B, 1) != n throw(DimensionMismatch("first dimension of B, $(size(B,1)), must match dimension of A, $n")) end if length(ipiv) != n throw(DimensionMismatch("length of ipiv, $(length(ipiv)), must match dimension of A, $n")) end nrhs = size(B, 2) lda = max(1, stride(A, 2)) ldb = max(1, stride(B, 2)) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), trans, n, nrhs, lda, ldb) scratchpad = oneVector{UInt8}(undef, scratchpad_size) $fname(sycl_queue(queue), trans, n, nrhs, A, lda, ipiv, B, ldb, scratchpad, scratchpad_size) return B end end end # getri for (bname, fname, elty) in ((:onemklSgetri_scratchpad_size, :onemklSgetri, :Float32), (:onemklDgetri_scratchpad_size, :onemklDgetri, :Float64), (:onemklCgetri_scratchpad_size, :onemklCgetri, :ComplexF32), (:onemklZgetri_scratchpad_size, :onemklZgetri, :ComplexF64)) @eval begin function getri!(A::oneStridedMatrix{$elty}, ipiv::oneStridedVector{Int64}) n = checksquare(A) lda = max(1, stride(A, 2)) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), n, lda) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), n, A, lda, ipiv, scratchpad, scratchpad_size) return A end end end # geqrf for (bname, fname, elty) in ((:onemklSgeqrf_scratchpad_size, :onemklSgeqrf, :Float32), (:onemklDgeqrf_scratchpad_size, :onemklDgeqrf, :Float64), (:onemklCgeqrf_scratchpad_size, :onemklCgeqrf, :ComplexF32), (:onemklZgeqrf_scratchpad_size, :onemklZgeqrf, :ComplexF64)) @eval begin function geqrf!(A::oneStridedMatrix{$elty}) m, n = size(A) tau = oneVector{$elty}(undef, min(m, n)) geqrf!(A, tau) end function geqrf!(A::oneStridedMatrix{$elty}, tau::oneVector{$elty}) m,n = size(A) lda = max(1, stride(A, 2)) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), m, n, lda) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), m, n, A, lda, tau, scratchpad, scratchpad_size) return A, tau end end end # ormqr and unmqr for (bname, fname, elty) in ((:onemklSormqr_scratchpad_size, :onemklSormqr, :Float32), (:onemklDormqr_scratchpad_size, :onemklDormqr, :Float64), (:onemklCunmqr_scratchpad_size, :onemklCunmqr, :ComplexF32), (:onemklZunmqr_scratchpad_size, :onemklZunmqr, :ComplexF64)) @eval begin function ormqr!( side::Char, trans::Char, A::oneStridedMatrix{$elty}, tau::oneStridedVector{$elty}, C::oneStridedVecOrMat{$elty}) trans = ($elty <: Real && trans == 'C') ? 'T' : trans chkside(side) chktrans(trans) m, n = (ndims(C) == 2) ? size(C) : (size(C, 1), 1) k = length(tau) mA = size(A, 1) side == 'L' && m != mA && throw(DimensionMismatch( "for a left-sided multiplication, the first dimension of C, $m, must equal the second dimension of A, $mA")) side == 'R' && n != mA && throw(DimensionMismatch( "for a right-sided multiplication, the second dimension of C, $m, must equal the second dimension of A, $mA")) side == 'L' && k > m && throw(DimensionMismatch( "invalid number of reflectors: k = $k should be ≤ m = $m")) side == 'R' && k > n && throw(DimensionMismatch( "invalid number of reflectors: k = $k should be ≤ n = $n")) lda = max(1, stride(A, 2)) ldc = max(1, stride(C, 2)) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), side, trans, m, n, k, lda, ldc) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), side, trans, m, n, k, A, lda, tau, C, ldc, scratchpad, scratchpad_size) return C end end end ## orgqr and ungqr for (bname, fname, elty) in ((:onemklSorgqr_scratchpad_size, :onemklSorgqr, :Float32), (:onemklDorgqr_scratchpad_size, :onemklDorgqr, :Float64), (:onemklCungqr_scratchpad_size, :onemklCungqr, :ComplexF32), (:onemklZungqr_scratchpad_size, :onemklZungqr, :ComplexF64)) @eval begin function orgqr!(A::oneStridedMatrix{$elty}, tau::oneStridedVector{$elty}) m, n = size(A) lda = max(1, stride(A, 2)) k = length(tau) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), m, n, k, lda) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), m, n, k, A, lda, tau, scratchpad, scratchpad_size) return A end end end # gebrd for (bname, fname, elty, relty) in ((:onemklSgebrd_scratchpad_size, :onemklSgebrd, :Float32, :Float32), (:onemklDgebrd_scratchpad_size, :onemklDgebrd, :Float64, :Float64), (:onemklCgebrd_scratchpad_size, :onemklCgebrd, :ComplexF32, :Float32), (:onemklZgebrd_scratchpad_size, :onemklZgebrd, :ComplexF64, :Float64)) @eval begin function gebrd!(A::oneStridedMatrix{$elty}) m, n = size(A) lda = max(1, stride(A, 2)) k = min(m, n) D = oneVector{$relty}(undef, k) E = oneVector{$relty}(undef, k-1) tauq = oneVector{$elty}(undef, k) taup = oneVector{$elty}(undef, k) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), m, n, lda) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), m, n, A, lda, D, E, tauq, taup, scratchpad, scratchpad_size) A, D, E, tauq, taup end end end # gesvd for (bname, fname, elty, relty) in ((:onemklSgesvd_scratchpad_size, :onemklSgesvd, :Float32, :Float32), (:onemklDgesvd_scratchpad_size, :onemklDgesvd, :Float64, :Float64), (:onemklCgesvd_scratchpad_size, :onemklCgesvd, :ComplexF32, :Float32), (:onemklZgesvd_scratchpad_size, :onemklZgesvd, :ComplexF64, :Float64)) @eval begin function gesvd!(jobu::Char, jobvt::Char, A::oneStridedMatrix{$elty}) m, n = size(A) k = min(m, n) lda = max(1, stride(A, 2)) U = if jobu === 'A' oneMatrix{$elty}(undef, m, m) elseif jobu === 'S' oneMatrix{$elty}(undef, m, k) elseif jobu === 'N' || jobu === 'O' ZE_NULL else error("jobu must be one of 'A', 'S', 'O', or 'N'") end ldu = U == ZE_NULL ? 1 : max(1, stride(U, 2)) S = oneVector{$relty}(undef, k) Vt = if jobvt === 'A' oneMatrix{$elty}(undef, n, n) elseif jobvt === 'S' oneMatrix{$elty}(undef, k, n) elseif jobvt === 'N' || jobvt === 'O' ZE_NULL else error("jobvt must be one of 'A', 'S', 'O', or 'N'") end ldvt = Vt == ZE_NULL ? 1 : max(1, stride(Vt, 2)) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), jobu, jobvt, m, n, lda, ldu, ldvt) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), jobu, jobvt, m, n, A, lda, S, U, ldu, Vt, ldvt, scratchpad, scratchpad_size) return U, S, Vt end end end # syevd and heevd for (jname, bname, fname, elty, relty) in ((:syevd!, :onemklSsyevd_scratchpad_size, :onemklSsyevd, :Float32, :Float32), (:syevd!, :onemklDsyevd_scratchpad_size, :onemklDsyevd, :Float64, :Float64), (:heevd!, :onemklCheevd_scratchpad_size, :onemklCheevd, :ComplexF32, :Float32), (:heevd!, :onemklZheevd_scratchpad_size, :onemklZheevd, :ComplexF64, :Float64)) @eval begin function $jname(jobz::Char, uplo::Char, A::oneStridedMatrix{$elty}) chkuplo(uplo) n = checksquare(A) lda = max(1, stride(A, 2)) W = oneVector{$relty}(undef, n) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), jobz, uplo, n, lda) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), jobz, uplo, n, A, lda, W, scratchpad, scratchpad_size) if jobz == 'N' return W elseif jobz == 'V' return W, A end end end end # sygvd and hegvd for (jname, bname, fname, elty, relty) in ((:sygvd!, :onemklSsygvd_scratchpad_size, :onemklSsygvd, :Float32, :Float32), (:sygvd!, :onemklDsygvd_scratchpad_size, :onemklDsygvd, :Float64, :Float64), (:hegvd!, :onemklChegvd_scratchpad_size, :onemklChegvd, :ComplexF32, :Float32), (:hegvd!, :onemklZhegvd_scratchpad_size, :onemklZhegvd, :ComplexF64, :Float64)) @eval begin function $jname(itype::Int, jobz::Char, uplo::Char, A::oneStridedMatrix{$elty}, B::oneStridedMatrix{$elty}) chkuplo(uplo) nA, nB = checksquare(A, B) if nB != nA throw(DimensionMismatch("Dimensions of A ($nA, $nA) and B ($nB, $nB) must match!")) end n = nA lda = max(1, stride(A, 2)) ldb = max(1, stride(B, 2)) W = oneVector{$relty}(undef, n) queue = global_queue(context(A), device()) scratchpad_size = $bname(sycl_queue(queue), itype, jobz, uplo, n, lda, ldb) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), itype, jobz, uplo, n, A, lda, B, ldb, W, scratchpad, scratchpad_size) if jobz == 'N' return W elseif jobz == 'V' return W, A, B end end end end # potrf_batch for (bname, fname, elty) in ((:onemklSpotrf_batch_scratchpad_size, :onemklSpotrf_batch, :Float32), (:onemklDpotrf_batch_scratchpad_size, :onemklDpotrf_batch, :Float64), (:onemklCpotrf_batch_scratchpad_size, :onemklCpotrf_batch, :ComplexF32), (:onemklZpotrf_batch_scratchpad_size, :onemklZpotrf_batch, :ComplexF64)) @eval begin function potrf_batched!(A::Vector{<:oneMatrix{$elty}}) group_count = length(A) group_sizes = ones(Int64, group_count) uplo = [ONEMKL_UPLO_LOWER for i=1:group_count] n = [checksquare(A[i]) for i=1:group_count] lda = [max(1, stride(A[i], 2)) for i=1:group_count] Aptrs = unsafe_batch(A) queue = global_queue(context(A[1]), device(A[1])) scratchpad_size = $bname(sycl_queue(queue), uplo, n, lda, group_count, group_sizes) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), uplo, n, Aptrs, lda, group_count, group_sizes, scratchpad, scratchpad_size) unsafe_free!(Aptrs) return A end end end # potrs_batch for (bname, fname, elty) in ((:onemklSpotrs_batch_scratchpad_size, :onemklSpotrs_batch, :Float32), (:onemklDpotrs_batch_scratchpad_size, :onemklDpotrs_batch, :Float64), (:onemklCpotrs_batch_scratchpad_size, :onemklCpotrs_batch, :ComplexF32), (:onemklZpotrs_batch_scratchpad_size, :onemklZpotrs_batch, :ComplexF64)) @eval begin function potrs_batched!(A::Vector{<:oneMatrix{$elty}}, B::Vector{<:oneMatrix{$elty}}) group_count = length(A) group_sizes = ones(Int64, group_count) uplo = [ONEMKL_UPLO_LOWER for i=1:group_count] n = [checksquare(A[i]) for i=1:group_count] nrhs = [size(B[i], 2) for i=1:group_count] lda = [max(1, stride(A[i], 2)) for i=1:group_count] ldb = [max(1, stride(B[i], 2)) for i=1:group_count] Aptrs = unsafe_batch(A) Bptrs = unsafe_batch(B) queue = global_queue(context(A[1]), device(A[1])) scratchpad_size = $bname(sycl_queue(queue), uplo, n, nrhs, lda, ldb, group_count, group_sizes) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), uplo, n, nrhs, Aptrs, lda, Bptrs, ldb, group_count, group_sizes, scratchpad, scratchpad_size) unsafe_free!(Aptrs) unsafe_free!(Bptrs) return A end end end # getrf_batch for (bname, fname, elty) in ((:onemklSgetrf_batch_scratchpad_size, :onemklSgetrf_batch, :Float32), (:onemklDgetrf_batch_scratchpad_size, :onemklDgetrf_batch, :Float64), (:onemklCgetrf_batch_scratchpad_size, :onemklCgetrf_batch, :ComplexF32), (:onemklZgetrf_batch_scratchpad_size, :onemklZgetrf_batch, :ComplexF64)) @eval begin function getrf_batched!(A::Vector{<:oneMatrix{$elty}}) group_count = length(A) group_sizes = ones(Int64, group_count) m = [size(A[i], 1) for i=1:group_count] n = [size(A[i], 2) for i=1:group_count] lda = [max(1, stride(A[i], 2)) for i=1:group_count] ipiv = [oneVector{Int64}(undef, min(m[i], n[i])) for i=1:group_count] Aptrs = unsafe_batch(A) ipivptrs = unsafe_batch(ipiv) queue = global_queue(context(A[1]), device(A[1])) scratchpad_size = $bname(sycl_queue(queue), m, n, lda, group_count, group_sizes) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), m, n, Aptrs, lda, ipivptrs, group_count, group_sizes, scratchpad, scratchpad_size) unsafe_free!(Aptrs) unsafe_free!(ipivptrs) return ipiv, A end end end # getrs_batch for (bname, fname, elty) in ((:onemklSgetrs_batch_scratchpad_size, :onemklSgetrs_batch, :Float32), (:onemklDgetrs_batch_scratchpad_size, :onemklDgetrs_batch, :Float64), (:onemklCgetrs_batch_scratchpad_size, :onemklCgetrs_batch, :ComplexF32), (:onemklZgetrs_batch_scratchpad_size, :onemklZgetrs_batch, :ComplexF64)) @eval begin function getrs_batched!(A::Vector{<:oneMatrix{$elty}}, ipiv::Vector{<:oneVector{Int64}}, B::Vector{<:oneMatrix{$elty}}) group_count = length(A) group_sizes = ones(Int64, group_count) trans = [ONEMKL_TRANSPOSE_NONTRANS for i=1:group_count] n = [checksquare(A[i]) for i=1:group_count] nrhs = [size(B[i], 2) for i=1:group_count] lda = [max(1, stride(A[i], 2)) for i=1:group_count] ldb = [max(1, stride(B[i], 2)) for i=1:group_count] Aptrs = unsafe_batch(A) Bptrs = unsafe_batch(B) ipivptrs = unsafe_batch(ipiv) queue = global_queue(context(A[1]), device(A[1])) scratchpad_size = $bname(sycl_queue(queue), trans, n, nrhs, lda, ldb, group_count, group_sizes) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), trans, n, nrhs, Aptrs, lda, ipivptrs, Bptrs, ldb, group_count, group_sizes, scratchpad, scratchpad_size) unsafe_free!(Aptrs) unsafe_free!(Bptrs) unsafe_free!(ipivptrs) return B end end end # getri_batch for (bname, fname, elty) in ((:onemklSgetri_batch_scratchpad_size, :onemklSgetri_batch, :Float32), (:onemklDgetri_batch_scratchpad_size, :onemklDgetri_batch, :Float64), (:onemklCgetri_batch_scratchpad_size, :onemklCgetri_batch, :ComplexF32), (:onemklZgetri_batch_scratchpad_size, :onemklZgetri_batch, :ComplexF64)) @eval begin function getri_batched!(A::Vector{<:oneMatrix{$elty}}, ipiv::Vector{<:oneVector{Int64}}) group_count = length(A) group_sizes = ones(Int64, group_count) n = [checksquare(A[i]) for i=1:group_count] lda = [max(1, stride(A[i], 2)) for i=1:group_count] Aptrs = unsafe_batch(A) ipivptrs = unsafe_batch(ipiv) queue = global_queue(context(A[1]), device(A[1])) scratchpad_size = $bname(sycl_queue(queue), n, lda, group_count, group_sizes) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), n, Aptrs, lda, ipivptrs, group_count, group_sizes, scratchpad, scratchpad_size) unsafe_free!(Aptrs) unsafe_free!(ipivptrs) return ipiv, A end end end # geqrf_batch for (bname, fname, elty) in ((:onemklSgeqrf_batch_scratchpad_size, :onemklSgeqrf_batch, :Float32), (:onemklDgeqrf_batch_scratchpad_size, :onemklDgeqrf_batch, :Float64), (:onemklCgeqrf_batch_scratchpad_size, :onemklCgeqrf_batch, :ComplexF32), (:onemklZgeqrf_batch_scratchpad_size, :onemklZgeqrf_batch, :ComplexF64)) @eval begin function geqrf_batched!(A::Vector{<:oneMatrix{$elty}}) group_count = length(A) group_sizes = ones(Int64, group_count) m = [size(A[i], 1) for i=1:group_count] n = [size(A[i], 2) for i=1:group_count] lda = [max(1, stride(A[i], 2)) for i=1:group_count] tau = [oneVector{$elty}(undef, min(m[i], n[i])) for i=1:group_count] Aptrs = unsafe_batch(A) tauptrs = unsafe_batch(tau) queue = global_queue(context(A[1]), device(A[1])) scratchpad_size = $bname(sycl_queue(queue), m, n, lda, group_count, group_sizes) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), m, n, Aptrs, lda, tauptrs, group_count, group_sizes, scratchpad, scratchpad_size) unsafe_free!(Aptrs) unsafe_free!(tauptrs) return tau, A end end end # orgqr_batch and ungqr_batch for (bname, fname, elty) in ((:onemklSorgqr_batch_scratchpad_size, :onemklSorgqr_batch, :Float32), (:onemklDorgqr_batch_scratchpad_size, :onemklDorgqr_batch, :Float64), (:onemklCungqr_batch_scratchpad_size, :onemklCungqr_batch, :ComplexF32), (:onemklZungqr_batch_scratchpad_size, :onemklZungqr_batch, :ComplexF64)) @eval begin function orgqr_batched!(A::Vector{<:oneMatrix{$elty}}, tau::Vector{<:oneVector{$elty}}) group_count = length(A) group_sizes = ones(Int64, group_count) m = [size(A[i], 1) for i=1:group_count] n = [size(A[i], 2) for i=1:group_count] k = [min(m[i], n[i]) for i=1:group_count] lda = [max(1, stride(A[i], 2)) for i=1:group_count] Aptrs = unsafe_batch(A) tauptrs = unsafe_batch(tau) queue = global_queue(context(A[1]), device(A[1])) scratchpad_size = $bname(sycl_queue(queue), m, n, k, lda, group_count, group_sizes) scratchpad = oneVector{$elty}(undef, scratchpad_size) $fname(sycl_queue(queue), m, n, k, Aptrs, lda, tauptrs, group_count, group_sizes, scratchpad, scratchpad_size) unsafe_free!(Aptrs) unsafe_free!(tauptrs) return A end end end # LAPACK for elty in (:Float32, :Float64, :ComplexF32, :ComplexF64) @eval begin LinearAlgebra.LAPACK.potrf!(uplo::Char, A::oneStridedMatrix{$elty}) = oneMKL.potrf!(uplo, A) LinearAlgebra.LAPACK.potrs!(uplo::Char, A::oneStridedMatrix{$elty}, B::oneStridedVecOrMat{$elty}) = oneMKL.potrs!(uplo, A, B) LinearAlgebra.LAPACK.sytrf!(uplo::Char, A::oneStridedMatrix{$elty}) = oneMKL.sytrf!(uplo, A) LinearAlgebra.LAPACK.sytrf!(uplo::Char, A::oneStridedMatrix{$elty}, ipiv::oneStridedVector{Int64}) = oneMKL.sytrf!(uplo, A, ipiv) LinearAlgebra.LAPACK.geqrf!(A::oneStridedMatrix{$elty}) = oneMKL.geqrf!(A) LinearAlgebra.LAPACK.geqrf!(A::oneStridedMatrix{$elty}, tau::oneStridedVector{$elty}) = oneMKL.geqrf!(A, tau) LinearAlgebra.LAPACK.getrf!(A::oneStridedMatrix{$elty}) = oneMKL.getrf!(A) LinearAlgebra.LAPACK.getrf!(A::oneStridedMatrix{$elty}, ipiv::oneStridedVector{Int64}) = oneMKL.getrf!(A, ipiv) LinearAlgebra.LAPACK.getrs!(trans::Char, A::oneStridedMatrix{$elty}, ipiv::oneStridedVector{Int64}, B::oneStridedVecOrMat{$elty}) = oneMKL.getrs!(trans, A, ipiv, B) LinearAlgebra.LAPACK.ormqr!(side::Char, trans::Char, A::oneStridedMatrix{$elty}, tau::oneStridedVector{$elty}, C::oneStridedVecOrMat{$elty}) = oneMKL.ormqr!(side, trans, A, tau, C) LinearAlgebra.LAPACK.orgqr!(A::oneStridedMatrix{$elty}, tau::oneStridedVector{$elty}) = oneMKL.orgqr!(A, tau) LinearAlgebra.LAPACK.gebrd!(A::oneStridedMatrix{$elty}) = oneMKL.gebrd!(A) LinearAlgebra.LAPACK.gesvd!(jobu::Char, jobvt::Char, A::oneStridedMatrix{$elty}) = oneMKL.gesvd!(jobu, jobvt, A) end end for elty in (:Float32, :Float64) @eval begin LinearAlgebra.LAPACK.syev!(jobz::Char, uplo::Char, A::oneStridedMatrix{$elty}) = oneMKL.syevd!(jobz, uplo, A) LinearAlgebra.LAPACK.sygvd!(itype::Int, jobz::Char, uplo::Char, A::oneStridedMatrix{$elty}, B::oneStridedMatrix{$elty}) = oneMKL.sygvd!(itype, jobz, uplo, A, B) end end for elty in (:ComplexF32, :ComplexF64) @eval begin LinearAlgebra.LAPACK.syev!(jobz::Char, uplo::Char, A::oneStridedMatrix{$elty}) = oneMKL.heevd!(jobz, uplo, A) LinearAlgebra.LAPACK.sygvd!(itype::Int, jobz::Char, uplo::Char, A::oneStridedMatrix{$elty}, B::oneStridedMatrix{$elty}) = oneMKL.hegvd!(itype, jobz, uplo, A, B) end end for elty in (:Float32, :Float64) @eval begin LinearAlgebra.LAPACK.syevd!(jobz::Char, uplo::Char, A::oneStridedMatrix{$elty}) = oneMKL.syevd!(jobz, uplo, A) end end for elty in (:ComplexF32, :ComplexF64) @eval begin LinearAlgebra.LAPACK.syevd!(jobz::Char, uplo::Char, A::oneStridedMatrix{$elty}) = oneMKL.heevd!(jobz, uplo, A) end end ================================================ FILE: lib/mkl/wrappers_sparse.jl ================================================ # Deferred release queue for sparse matrix handles. # Finalizers run on the GC thread, but onemklXsparse_release_matrix_handle submits # work to the SYCL queue. Using the same queue from the GC thread and the main thread # concurrently is not safe and causes ZE_RESULT_ERROR_DEVICE_LOST / ZE_RESULT_ERROR_UNKNOWN. # Instead, finalizers push handles here and they are released on the main thread. const _deferred_sparse_handles = Vector{matrix_handle_t}() const _deferred_sparse_handles_lock = ReentrantLock() function sparse_release_matrix_handle(A::oneAbstractSparseMatrix) return if A.handle !== nothing lock(_deferred_sparse_handles_lock) do push!(_deferred_sparse_handles, A.handle) end end end function flush_deferred_sparse_releases() handles = lock(_deferred_sparse_handles_lock) do if isempty(_deferred_sparse_handles) return matrix_handle_t[] end h = copy(_deferred_sparse_handles) empty!(_deferred_sparse_handles) return h end isempty(handles) && return dev = device() ctx = context() queue = global_queue(ctx, dev) for handle in handles try handle_ptr = Ref{matrix_handle_t}(handle) onemklXsparse_release_matrix_handle(sycl_queue(queue), handle_ptr) catch err @warn "Error releasing sparse matrix handle" exception = err end end return synchronize(queue) end for (fname, elty, intty) in ((:onemklSsparse_set_csr_data , :Float32 , :Int32), (:onemklSsparse_set_csr_data_64, :Float32 , :Int64), (:onemklDsparse_set_csr_data , :Float64 , :Int32), (:onemklDsparse_set_csr_data_64, :Float64 , :Int64), (:onemklCsparse_set_csr_data , :ComplexF32, :Int32), (:onemklCsparse_set_csr_data_64, :ComplexF32, :Int64), (:onemklZsparse_set_csr_data , :ComplexF64, :Int32), (:onemklZsparse_set_csr_data_64, :ComplexF64, :Int64)) @eval begin function oneSparseMatrixCSR( rowPtr::oneVector{$intty}, colVal::oneVector{$intty}, nzVal::oneVector{$elty}, dims::NTuple{2, Int} ) flush_deferred_sparse_releases() handle_ptr = Ref{matrix_handle_t}() onemklXsparse_init_matrix_handle(handle_ptr) m, n = dims nnzA = length(nzVal) queue = global_queue(context(nzVal), device(nzVal)) # Don't update handle if matrix is empty if m != 0 && n != 0 $fname(sycl_queue(queue), handle_ptr[], m, n, 'O', rowPtr, colVal, nzVal) dA = oneSparseMatrixCSR{$elty, $intty}(handle_ptr[], rowPtr, colVal, nzVal, (m, n), nnzA) finalizer(sparse_release_matrix_handle, dA) else dA = oneSparseMatrixCSR{$elty, $intty}(nothing, rowPtr, colVal, nzVal, (m, n), nnzA) end return dA end function oneSparseMatrixCSC( colPtr::oneVector{$intty}, rowVal::oneVector{$intty}, nzVal::oneVector{$elty}, dims::NTuple{2, Int} ) flush_deferred_sparse_releases() queue = global_queue(context(nzVal), device(nzVal)) handle_ptr = Ref{matrix_handle_t}() onemklXsparse_init_matrix_handle(handle_ptr) m, n = dims nnzA = length(nzVal) # Don't update handle if matrix is empty if m != 0 && n != 0 $fname(sycl_queue(queue), handle_ptr[], n, m, 'O', colPtr, rowVal, nzVal) # CSC of A is CSR of Aᵀ dA = oneSparseMatrixCSC{$elty, $intty}(handle_ptr[], colPtr, rowVal, nzVal, (m, n), nnzA) finalizer(sparse_release_matrix_handle, dA) else dA = oneSparseMatrixCSC{$elty, $intty}(nothing, colPtr, rowVal, nzVal, (m, n), nnzA) end return dA end function oneSparseMatrixCSR(A::SparseMatrixCSC{$elty, $intty}) m, n = size(A) At = SparseMatrixCSC(A |> transpose) rowPtr = oneVector{$intty}(At.colptr) colVal = oneVector{$intty}(At.rowval) nzVal = oneVector{$elty}(At.nzval) return oneSparseMatrixCSR(rowPtr, colVal, nzVal, (m, n)) end function SparseArrays.SparseMatrixCSC(A::oneSparseMatrixCSR{$elty, $intty}) handle_ptr = Ref{matrix_handle_t}() At = SparseMatrixCSC(reverse(A.dims)..., Vector(A.rowPtr), Vector(A.colVal), Vector(A.nzVal)) A_csc = SparseMatrixCSC(At |> transpose) return A_csc end function oneSparseMatrixCSC(A::SparseMatrixCSC{$elty, $intty}) m, n = size(A) colPtr = oneVector{$intty}(A.colptr) rowVal = oneVector{$intty}(A.rowval) nzVal = oneVector{$elty}(A.nzval) return oneSparseMatrixCSC(colPtr, rowVal, nzVal, (m, n)) end function SparseArrays.SparseMatrixCSC(A::oneSparseMatrixCSC{$elty, $intty}) handle_ptr = Ref{matrix_handle_t}() A_csc = SparseMatrixCSC(A.dims..., Vector(A.colPtr), Vector(A.rowVal), Vector(A.nzVal)) return A_csc end end end for (fname, elty, intty) in ((:onemklSsparse_set_coo_data , :Float32 , :Int32), (:onemklSsparse_set_coo_data_64, :Float32 , :Int64), (:onemklDsparse_set_coo_data , :Float64 , :Int32), (:onemklDsparse_set_coo_data_64, :Float64 , :Int64), (:onemklCsparse_set_coo_data , :ComplexF32, :Int32), (:onemklCsparse_set_coo_data_64, :ComplexF32, :Int64), (:onemklZsparse_set_coo_data , :ComplexF64, :Int32), (:onemklZsparse_set_coo_data_64, :ComplexF64, :Int64)) @eval begin function oneSparseMatrixCOO(A::SparseMatrixCSC{$elty, $intty}) flush_deferred_sparse_releases() handle_ptr = Ref{matrix_handle_t}() onemklXsparse_init_matrix_handle(handle_ptr) m, n = size(A) row, col, val = findnz(A) rowInd = oneVector{$intty}(row) colInd = oneVector{$intty}(col) nzVal = oneVector{$elty}(val) nnzA = length(val) queue = global_queue(context(nzVal), device(nzVal)) if m != 0 && n != 0 $fname(sycl_queue(queue), handle_ptr[], m, n, nnzA, 'O', rowInd, colInd, nzVal) dA = oneSparseMatrixCOO{$elty, $intty}(handle_ptr[], rowInd, colInd, nzVal, (m, n), nnzA) finalizer(sparse_release_matrix_handle, dA) else dA = oneSparseMatrixCOO{$elty, $intty}(nothing, rowInd, colInd, nzVal, (m, n), nnzA) end return dA end function SparseArrays.SparseMatrixCSC(A::oneSparseMatrixCOO{$elty, $intty}) handle_ptr = Ref{matrix_handle_t}() A = sparse(Vector(A.rowInd), Vector(A.colInd), Vector(A.nzVal), A.dims...) return A end end end for SparseMatrix in (:oneSparseMatrixCSR, :oneSparseMatrixCOO) for (fname, elty) in ((:onemklSsparse_gemv, :Float32), (:onemklDsparse_gemv, :Float64), (:onemklCsparse_gemv, :ComplexF32), (:onemklZsparse_gemv, :ComplexF64)) @eval begin function sparse_gemv!(trans::Char, alpha::Number, A::$SparseMatrix{$elty}, x::oneStridedVector{$elty}, beta::Number, y::oneStridedVector{$elty}) queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), trans, alpha, A.handle, x, beta, y) y end end end @eval begin function sparse_optimize_gemv!(trans::Char, A::$SparseMatrix) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_gemv(sycl_queue(queue), trans, A.handle) return A end end end for SparseMatrix in (:oneSparseMatrixCSC,) # CSC(A) is represented by storing CSR(A^T). Map operations accordingly: # - trans = 'N': want A*x -> use op(S)='T' with S=A^T. # - trans = 'T': want A^T*x -> use op(S)='N' with S=A^T. # - trans = 'C': want A^H*x. # * For real eltypes, A^H == A^T -> use op(S)='N'. # * For complex eltypes, we cannot express A^H using a single op(S). # Use identity: conj(y_new) = conj(alpha) * A * conj(x) + conj(beta) * conj(y) # and compute with op(S)='T' (since S^T = A), conjugating x and y around the call. for (fname, elty) in ((:onemklSsparse_gemv, :Float32), (:onemklDsparse_gemv, :Float64)) @eval begin function sparse_gemv!(trans::Char, alpha::Number, A::$SparseMatrix{$elty}, x::oneStridedVector{$elty}, beta::Number, y::oneStridedVector{$elty}) queue = global_queue(context(x), device(x)) m, n = size(A) if m != 0 && n != 0 $fname(sycl_queue(queue), flip_trans(trans), alpha, A.handle, x, beta, y) end y end end end # Special handling for CSC matrices since they are stored as transposed CSR for (fname, elty) in ( (:onemklCsparse_gemv, :ComplexF32), (:onemklZsparse_gemv, :ComplexF64), ) @eval begin function sparse_gemv!( trans::Char, alpha::Number, A::$SparseMatrix{$elty}, x::oneStridedVector{$elty}, beta::Number, y::oneStridedVector{$elty} ) # Compute A^H*x via identity: # conj(y_new) = conj(alpha) * (A^T) * conj(x) + conj(beta) * conj(y) # Since S=A^T and op='N' computes S*x = A^T*x, we can realize this with one call. if trans == 'C' y .= conj.(y) x .= conj.(x) alpha = conj(alpha) beta = conj(beta) end queue = global_queue(context(x), device(x)) $fname(sycl_queue(queue), flip_trans(trans), alpha, A.handle, x, beta, y) if trans == 'C' y .= conj.(y) # Restore x x .= conj.(x) end return y end end end @eval begin function sparse_optimize_gemv!(trans::Char, A::$SparseMatrix) # complex 'C' case is implemented using op='N' on S=A^T with conjugation trick queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_gemv(sycl_queue(queue), flip_trans(trans), A.handle) return A end end end for (fname, elty) in ((:onemklSsparse_gemm, :Float32), (:onemklDsparse_gemm, :Float64), (:onemklCsparse_gemm, :ComplexF32), (:onemklZsparse_gemm, :ComplexF64), ) @eval begin function sparse_gemm!(transa::Char, transb::Char, alpha::Number, A::oneSparseMatrixCSR{$elty}, B::oneStridedMatrix{$elty}, beta::Number, C::oneStridedMatrix{$elty} ) mB, nB = size(B) mC, nC = size(C) (nB != nC) && (transb == 'N') && throw(ArgumentError("B and C must have the same number of columns.")) (mB != nC) && (transb != 'N') && throw(ArgumentError("Bᵀ and C must have the same number of columns.")) nrhs = size(B, 2) ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) queue = global_queue(context(C), device(C)) $fname(sycl_queue(queue), 'C', transa, transb, alpha, A.handle, B, nrhs, ldb, beta, C, ldc) C end end end function sparse_optimize_gemm!(trans::Char, A::oneSparseMatrixCSR) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_gemm(sycl_queue(queue), trans, A.handle) return A end function sparse_optimize_gemm!(trans::Char, transB::Char, nrhs::Int, A::oneSparseMatrixCSR) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_gemm_advanced(sycl_queue(queue), 'C', trans, transB, A.handle, nrhs) return A end for (fname, elty) in ((:onemklSsparse_gemm, :Float32), (:onemklDsparse_gemm, :Float64)) @eval begin function sparse_gemm!(transa::Char, transb::Char, alpha::Number, A::oneSparseMatrixCSC{$elty}, B::oneStridedMatrix{$elty}, beta::Number, C::oneStridedMatrix{$elty}) mB, nB = size(B) mC, nC = size(C) (nB != nC) && (transb == 'N') && throw(ArgumentError("B and C must have the same number of columns.")) (mB != nC) && (transb != 'N') && throw(ArgumentError("Bᵀ and C must have the same number of columns.")) nrhs = size(B, 2) ldb = max(1,stride(B,2)) ldc = max(1,stride(C,2)) queue = global_queue(context(C), device(C)) $fname(sycl_queue(queue), 'C', flip_trans(transa), transb, alpha, A.handle, B, nrhs, ldb, beta, C, ldc) C end end end # Special handling for CSC matrices since they are stored as transposed CSR (S = A^T) for (fname, elty) in ( (:onemklCsparse_gemm, :ComplexF32), (:onemklZsparse_gemm, :ComplexF64), ) @eval begin function sparse_gemm!( transa::Char, transb::Char, alpha::Number, A::oneSparseMatrixCSC{$elty}, B::oneStridedMatrix{$elty}, beta::Number, C::oneStridedMatrix{$elty} ) # Map op(A) to op(S) where S = A^T stored as CSR in the handle # transa: 'N' -> op(S)='T'; 'T' -> op(S)='N'; 'C' -> # real: op(S)='N' (since A^H == A^T) # complex: use conjugation identity on B and C with op(S)='N' mB, nB = size(B) mC, nC = size(C) (nB != nC) && (transb == 'N') && throw(ArgumentError("B and C must have the same number of columns.")) (mB != nC) && (transb != 'N') && throw(ArgumentError("Bᵀ and C must have the same number of columns.")) nrhs = size(B, 2) ldb = max(1, stride(B, 2)) ldc = max(1, stride(C, 2)) queue = global_queue(context(C), device(C)) # Use identity: conj(C_new) = conj(alpha) * S * conj(opB(B)) + conj(beta) * conj(C) # Prepare conj(C) in-place and conj(B) into a temporary if needed # Determine how to supply opB under conjugation # - transb == 'N': pass transb='N' and use conj(B) # - transb == 'T': pass transb='T' and use conj(B) # - transb == 'C': since conj(B^H) = B^T, pass transb='T' and use B as-is local transb_eff local Beff if transa == 'C' C .= conj.(C) alpha = conj(alpha) beta = conj(beta) if transb == 'N' transb_eff = 'N' # Beff = similar(B) B .= conj.(B) elseif transb == 'T' transb_eff = 'T' # Beff = similar(B) B .= conj.(B) else # transb == 'C' transb_eff = 'T' end else transb_eff = transb end $fname(sycl_queue(queue), 'C', flip_trans(transa), transb_eff, alpha, A.handle, B, nrhs, ldb, beta, C, ldc) # Undo conjugation to obtain C_new if transa == 'C' C .= conj.(C) if transb == 'N' || transb == 'T' # Restore B B .= conj.(B) end end return C end end end function sparse_optimize_gemm!(trans::Char, A::oneSparseMatrixCSC) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_gemm(sycl_queue(queue), flip_trans(trans), A.handle) return A end function sparse_optimize_gemm!(trans::Char, transB::Char, nrhs::Int, A::oneSparseMatrixCSC) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_gemm_advanced(sycl_queue(queue), 'C', flip_trans(trans), transB, A.handle, nrhs) return A end for (fname, elty) in ((:onemklSsparse_symv, :Float32), (:onemklDsparse_symv, :Float64), (:onemklCsparse_symv, :ComplexF32), (:onemklZsparse_symv, :ComplexF64)) @eval begin function sparse_symv!(uplo::Char, alpha::Number, A::oneSparseMatrixCSR{$elty}, x::oneStridedVector{$elty}, beta::Number, y::oneStridedVector{$elty}) queue = global_queue(context(y), device(y)) $fname(sycl_queue(queue), uplo, alpha, A.handle, x, beta, y) y end end end for (fname, elty) in ((:onemklSsparse_symv, :Float32), (:onemklDsparse_symv, :Float64), (:onemklCsparse_symv, :ComplexF32), (:onemklZsparse_symv, :ComplexF64), ) @eval begin function sparse_symv!(uplo::Char, alpha::Number, A::oneSparseMatrixCSC{$elty}, x::oneStridedVector{$elty}, beta::Number, y::oneStridedVector{$elty}) queue = global_queue(context(y), device(y)) $fname(sycl_queue(queue), flip_uplo(uplo), alpha, A.handle, x, beta, y) y end end end for (fname, elty) in ((:onemklSsparse_trmv, :Float32), (:onemklDsparse_trmv, :Float64), (:onemklCsparse_trmv, :ComplexF32), (:onemklZsparse_trmv, :ComplexF64)) @eval begin function sparse_trmv!(uplo::Char, trans::Char, diag::Char, alpha::Number, A::oneSparseMatrixCSR{$elty}, x::oneStridedVector{$elty}, beta::Number, y::oneStridedVector{$elty}) queue = global_queue(context(y), device(y)) $fname(sycl_queue(queue), uplo, trans, diag, alpha, A.handle, x, beta, y) y end end end function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSR) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_trmv(sycl_queue(queue), uplo, trans, diag, A.handle) return A end # Special handling for CSC matrices since they are stored as transposed CSR for (fname, elty) in ( (:onemklSsparse_trmv, :Float32), (:onemklDsparse_trmv, :Float64), (:onemklCsparse_trmv, :ComplexF32), (:onemklZsparse_trmv, :ComplexF64), ) @eval begin function sparse_trmv!( uplo::Char, trans::Char, diag::Char, alpha::Number, A::oneSparseMatrixCSC{$elty}, x::oneStridedVector{$elty}, beta::Number, y::oneStridedVector{$elty} ) # Intel oneAPI sparse trmv only supports nontrans operations. # Since CSC(A) is stored as CSR(A^T), we cannot map CSC operations # to CSR operations for triangular operations without transpose support. throw( ArgumentError( "sparse_trmv! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * "Intel sparse library only supports nontrans operations for triangular matrix operations. " * "Convert to oneSparseMatrixCSR format instead." ) ) queue = global_queue(context(y), device(y)) $fname(sycl_queue(queue), uplo, flip_trans(trans), diag, alpha, A.handle, x, beta, y) return y end end end function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC) throw( ArgumentError( "sparse_optimize_trmv! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * "Intel sparse library only supports nontrans operations for triangular matrix operations. " * "Convert to oneSparseMatrixCSR format instead." ) ) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_trmv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle) return A end for (fname, elty) in ((:onemklSsparse_trsv, :Float32), (:onemklDsparse_trsv, :Float64), (:onemklCsparse_trsv, :ComplexF32), (:onemklZsparse_trsv, :ComplexF64)) @eval begin function sparse_trsv!(uplo::Char, trans::Char, diag::Char, alpha::Number, A::oneSparseMatrixCSR{$elty}, x::oneStridedVector{$elty}, y::oneStridedVector{$elty}) queue = global_queue(context(y), device(y)) $fname(sycl_queue(queue), uplo, trans, diag, alpha, A.handle, x, y) y end end end function sparse_optimize_trsv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSR) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_trsv(sycl_queue(queue), uplo, trans, diag, A.handle) return A end for (fname, elty) in ( (:onemklSsparse_trsv, :Float32), (:onemklDsparse_trsv, :Float64), (:onemklCsparse_trsv, :ComplexF32), (:onemklZsparse_trsv, :ComplexF64), ) @eval begin function sparse_trsv!( uplo::Char, trans::Char, diag::Char, alpha::Number, A::oneSparseMatrixCSC{$elty}, x::oneStridedVector{$elty}, y::oneStridedVector{$elty} ) throw( ArgumentError( "sparse_trsv! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * "Intel sparse library only supports nontrans operations for triangular matrix operations. " * "Convert to oneSparseMatrixCSR format instead." ) ) queue = global_queue(context(y), device(y)) onemklXsparse_optimize_trsv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle) return A end end end function sparse_optimize_trsv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC) throw( ArgumentError( "sparse_optimize_trsv! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * "Intel sparse library only supports nontrans operations for triangular matrix operations. " * "Convert to oneSparseMatrixCSR format instead." ) ) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_trsv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle) return A end for (fname, elty) in ((:onemklSsparse_trsm, :Float32), (:onemklDsparse_trsm, :Float64), (:onemklCsparse_trsm, :ComplexF32), (:onemklZsparse_trsm, :ComplexF64)) @eval begin function sparse_trsm!(uplo::Char, transA::Char, transX::Char, diag::Char, alpha::Number, A::oneSparseMatrixCSR{$elty}, X::oneStridedMatrix{$elty}, Y::oneStridedMatrix{$elty}) mX, nX = size(X) mY, nY = size(Y) (mX != mY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of rows.")) (nX != nY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of columns.")) (nX != mY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of rows.")) (mX != nY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of columns.")) nrhs = size(X, 2) ldx = max(1,stride(X,2)) ldy = max(1,stride(Y,2)) queue = global_queue(context(Y), device(Y)) $fname(sycl_queue(queue), 'C', transA, transX, uplo, diag, alpha, A.handle, X, nrhs, ldx, Y, ldy) Y end end end function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSR) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_trsm(sycl_queue(queue), uplo, trans, diag, A.handle) return A end function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A::oneSparseMatrixCSR) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_trsm_advanced(sycl_queue(queue), 'C', uplo, trans, diag, A.handle, nrhs) return A end # Only transA = 'N' is supported with oneSparseMatrixCSR. # We can't use any trick to support sparse "trsm" for oneSparseMatrixCSC. for (fname, elty) in ( (:onemklSsparse_trsm, :Float32), (:onemklDsparse_trsm, :Float64), (:onemklCsparse_trsm, :ComplexF32), (:onemklZsparse_trsm, :ComplexF64), ) @eval begin function sparse_trsm!( uplo::Char, transA::Char, transX::Char, diag::Char, alpha::Number, A::oneSparseMatrixCSC{$elty}, X::oneStridedMatrix{$elty}, Y::oneStridedMatrix{$elty} ) # Intel oneAPI sparse trsm only supports nontrans operations for the matrix A. # Since CSC(A) is stored as CSR(A^T), we cannot map CSC operations # to CSR operations for triangular solve operations without transpose support. throw( ArgumentError( "sparse_trsm! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * "Intel sparse library only supports nontrans operations for triangular matrix operations. " * "Convert to oneSparseMatrixCSR format instead." ) ) mX, nX = size(X) mY, nY = size(Y) (mX != mY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of rows.")) (nX != nY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of columns.")) (nX != mY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of rows.")) (mX != nY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of columns.")) nrhs = size(X, 2) ldx = max(1, stride(X, 2)) ldy = max(1, stride(Y, 2)) queue = global_queue(context(Y), device(Y)) $fname(sycl_queue(queue), 'C', flip_trans(transA), transX, uplo, diag, alpha, A.handle, X, nrhs, ldx, Y, ldy) return Y end end end function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC) throw( ArgumentError( "sparse_optimize_trsm! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * "Intel sparse library only supports nontrans operations for triangular matrix operations. " * "Convert to oneSparseMatrixCSR format instead." ) ) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_trsm(sycl_queue(queue), uplo, trans, diag, A.handle) return A end function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A::oneSparseMatrixCSC) throw( ArgumentError( "sparse_optimize_trsm! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * "Intel sparse library only supports nontrans operations for triangular matrix operations. " * "Convert to oneSparseMatrixCSR format instead." ) ) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_trsm_advanced(sycl_queue(queue), 'C', uplo, trans, diag, A.handle, nrhs) return A end ================================================ FILE: lib/support/Support.jl ================================================ module Support using ..oneAPI using ..oneL0 using ..oneL0: ze_driver_handle_t, ze_device_handle_t, ze_context_handle_t, ze_command_queue_handle_t, ze_event_handle_t using oneAPI_Support_jll include("liboneapi_support.jl") # export everything for n in names(@__MODULE__; all=true) if Base.isidentifier(n) && n ∉ (Symbol(@__MODULE__), :eval, :include) @eval export $n end end function __init__() precompiling = ccall(:jl_generating_output, Cint, ()) != 0 precompiling && return if !oneAPI_Support_jll.is_available() @error """oneAPI support wrapper not available for your platform.""" return end end end ================================================ FILE: lib/support/liboneapi_support.jl ================================================ using CEnum: CEnum, @cenum mutable struct syclPlatform_st end const syclPlatform_t = Ptr{syclPlatform_st} function syclPlatformCreate(obj, driver) @ccall liboneapi_support.syclPlatformCreate(obj::Ptr{syclPlatform_t}, driver::ze_driver_handle_t)::Cint end function syclPlatformDestroy(obj) @ccall liboneapi_support.syclPlatformDestroy(obj::syclPlatform_t)::Cint end mutable struct syclDevice_st end const syclDevice_t = Ptr{syclDevice_st} function syclDeviceCreate(obj, platform, device) @ccall liboneapi_support.syclDeviceCreate(obj::Ptr{syclDevice_t}, platform::syclPlatform_t, device::ze_device_handle_t)::Cint end function syclDeviceDestroy(obj) @ccall liboneapi_support.syclDeviceDestroy(obj::syclDevice_t)::Cint end mutable struct syclContext_st end const syclContext_t = Ptr{syclContext_st} function syclContextCreate(obj, devices, ndevices, context, keep_ownership) @ccall liboneapi_support.syclContextCreate(obj::Ptr{syclContext_t}, devices::Ptr{syclDevice_t}, ndevices::Csize_t, context::ze_context_handle_t, keep_ownership::Cint)::Cint end function syclContextDestroy(obj) @ccall liboneapi_support.syclContextDestroy(obj::syclContext_t)::Cint end mutable struct syclQueue_st end const syclQueue_t = Ptr{syclQueue_st} function syclQueueCreate(obj, context, device, queue, keep_ownership) @ccall liboneapi_support.syclQueueCreate(obj::Ptr{syclQueue_t}, context::syclContext_t, device::syclDevice_t, queue::ze_command_queue_handle_t, keep_ownership::Cint)::Cint end function syclQueueDestroy(obj) @ccall liboneapi_support.syclQueueDestroy(obj::syclQueue_t)::Cint end function syclQueueWait(obj) @ccall liboneapi_support.syclQueueWait(obj::syclQueue_t)::Cint end mutable struct syclEvent_st end const syclEvent_t = Ptr{syclEvent_st} function syclEventCreate(obj, context, event, keep_ownership) @ccall liboneapi_support.syclEventCreate(obj::Ptr{syclEvent_t}, context::syclContext_t, event::ze_event_handle_t, keep_ownership::Cint)::Cint end function syclEventDestroy(obj) @ccall liboneapi_support.syclEventDestroy(obj::syclEvent_t)::Cint end @cenum onemklTranspose::UInt32 begin ONEMKL_TRANSPOSE_NONTRANS = 0 ONEMKL_TRANSPOSE_TRANS = 1 ONEMLK_TRANSPOSE_CONJTRANS = 2 end @cenum onemklUplo::UInt32 begin ONEMKL_UPLO_UPPER = 0 ONEMKL_UPLO_LOWER = 1 end @cenum onemklDiag::UInt32 begin ONEMKL_DIAG_NONUNIT = 0 ONEMKL_DIAG_UNIT = 1 end @cenum onemklSide::UInt32 begin ONEMKL_SIDE_LEFT = 0 ONEMKL_SIDE_RIGHT = 1 end @cenum onemklOffset::UInt32 begin ONEMKL_OFFSET_ROW = 0 ONEMKL_OFFSET_COL = 1 ONEMKL_OFFSET_FIX = 2 end @cenum onemklJob::UInt32 begin ONEMKL_JOB_N = 0 ONEMKL_JOB_V = 1 ONEMKL_JOB_U = 2 ONEMKL_JOB_A = 3 ONEMKL_JOB_S = 4 ONEMKL_JOB_O = 5 end @cenum onemklGenerate::UInt32 begin ONEMKL_GENERATE_Q = 0 ONEMKL_GENERATE_P = 1 ONEMKL_GENERATE_N = 2 ONEMKL_GENERATE_V = 3 end @cenum onemklCompz::UInt32 begin ONEMKL_COMPZ_N = 0 ONEMKL_COMPZ_V = 1 ONEMKL_COMPZ_I = 2 end @cenum onemklDirect::UInt32 begin ONEMKL_DIRECT_F = 0 ONEMKL_DIRECT_B = 1 end @cenum onemklStorev::UInt32 begin ONEMKL_STOREV_C = 0 ONEMKL_STOREV_R = 1 end @cenum onemklRangev::UInt32 begin ONEMKL_RANGEV_A = 0 ONEMKL_RANGEV_V = 1 ONEMKL_RANGEV_I = 2 end @cenum onemklOrder::UInt32 begin ONEMKL_ORDER_B = 0 ONEMKL_ORDER_E = 1 end @cenum onemklJobsvd::UInt32 begin ONEMKL_JOBSVD_N = 0 ONEMKL_JOBSVD_A = 1 ONEMKL_JOBSVD_O = 2 ONEMKL_JOBSVD_S = 3 end @cenum onemklLayout::UInt32 begin ONEMKL_LAYOUT_ROW = 0 ONEMKL_LAYOUT_COL = 1 end @cenum onemklIndex::UInt32 begin ONEMKL_INDEX_ZERO = 0 ONEMKL_INDEX_ONE = 1 end @cenum onemklProperty::UInt32 begin ONEMKL_PROPERTY_SYMMETRIC = 0 ONEMKL_PROPERTY_SORTED = 1 end @cenum onemklMatrixView::UInt32 begin ONEMKL_MATRIX_VIEW_GENERAL = 0 end @cenum onemklMatmatRequest::UInt32 begin ONEMKL_MATMAT_REQUEST_GET_WORK_ESTIMATION_BUF_SIZE = 0 ONEMKL_MATMAT_REQUEST_WORK_ESTIMATION = 1 ONEMKL_MATMAT_REQUEST_GET_COMPUTE_STRUCTURE_BUF_SIZE = 2 ONEMKL_MATMAT_REQUEST_COMPUTE_STRUCTURE = 3 ONEMKL_MATMAT_REQUEST_FINALIZE_STRUCTURE = 4 ONEMKL_MATMAT_REQUEST_GET_COMPUTE_BUF_SIZE = 5 ONEMKL_MATMAT_REQUEST_COMPUTE = 6 ONEMKL_MATMAT_REQUEST_GET_NNZ = 7 ONEMKL_MATMAT_REQUEST_FINALIZE = 8 end @cenum onemklOmatconvertAlg::UInt32 begin ONEMKL_OMATCONVERT_DEFAULT_ALG = 0 end @cenum onemklOmataddAlg::UInt32 begin ONEMKL_OMATADD_DEFAULT_ALG = 0 end mutable struct matrix_handle end const matrix_handle_t = Ptr{matrix_handle} mutable struct matmat_descr end const matmat_descr_t = Ptr{matmat_descr} mutable struct omatconvert_descr end const omatconvert_descr_t = Ptr{omatconvert_descr} mutable struct omatadd_descr end const omatadd_descr_t = Ptr{omatadd_descr} function onemkl_version(major, minor, patch) @ccall liboneapi_support.onemkl_version(major::Ptr{Int64}, minor::Ptr{Int64}, patch::Ptr{Int64})::Cvoid end function onemklHgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size) @ccall liboneapi_support.onemklHgemm_batch(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::ZePtr{Int64}, n::ZePtr{Int64}, k::ZePtr{Int64}, alpha::ZePtr{Float16}, a::ZePtr{Ptr{Float16}}, lda::ZePtr{Int64}, b::ZePtr{Ptr{Float16}}, ldb::ZePtr{Int64}, beta::ZePtr{Float16}, c::ZePtr{Ptr{Float16}}, ldc::ZePtr{Int64}, group_count::Int64, group_size::ZePtr{Int64})::Cint end function onemklSgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size) @ccall liboneapi_support.onemklSgemm_batch(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::ZePtr{Int64}, n::ZePtr{Int64}, k::ZePtr{Int64}, alpha::ZePtr{Cfloat}, a::ZePtr{Ptr{Cfloat}}, lda::ZePtr{Int64}, b::ZePtr{Ptr{Cfloat}}, ldb::ZePtr{Int64}, beta::ZePtr{Cfloat}, c::ZePtr{Ptr{Cfloat}}, ldc::ZePtr{Int64}, group_count::Int64, group_size::ZePtr{Int64})::Cint end function onemklDgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size) @ccall liboneapi_support.onemklDgemm_batch(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::ZePtr{Int64}, n::ZePtr{Int64}, k::ZePtr{Int64}, alpha::ZePtr{Cdouble}, a::ZePtr{Ptr{Cdouble}}, lda::ZePtr{Int64}, b::ZePtr{Ptr{Cdouble}}, ldb::ZePtr{Int64}, beta::ZePtr{Cdouble}, c::ZePtr{Ptr{Cdouble}}, ldc::ZePtr{Int64}, group_count::Int64, group_size::ZePtr{Int64})::Cint end function onemklCgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size) @ccall liboneapi_support.onemklCgemm_batch(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::ZePtr{Int64}, n::ZePtr{Int64}, k::ZePtr{Int64}, alpha::ZePtr{ComplexF32}, a::ZePtr{Ptr{ComplexF32}}, lda::ZePtr{Int64}, b::ZePtr{Ptr{ComplexF32}}, ldb::ZePtr{Int64}, beta::ZePtr{ComplexF32}, c::ZePtr{Ptr{ComplexF32}}, ldc::ZePtr{Int64}, group_count::Int64, group_size::ZePtr{Int64})::Cint end function onemklZgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size) @ccall liboneapi_support.onemklZgemm_batch(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::ZePtr{Int64}, n::ZePtr{Int64}, k::ZePtr{Int64}, alpha::ZePtr{ComplexF64}, a::ZePtr{Ptr{ComplexF64}}, lda::ZePtr{Int64}, b::ZePtr{Ptr{ComplexF64}}, ldb::ZePtr{Int64}, beta::ZePtr{ComplexF64}, c::ZePtr{Ptr{ComplexF64}}, ldc::ZePtr{Int64}, group_count::Int64, group_size::ZePtr{Int64})::Cint end function onemklStrsm_batch(device_queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size) @ccall liboneapi_support.onemklStrsm_batch(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, transa::onemklTranspose, unit_diag::onemklDiag, m::ZePtr{Int64}, n::ZePtr{Int64}, alpha::ZePtr{Cfloat}, a::ZePtr{Ptr{Cfloat}}, lda::ZePtr{Int64}, b::ZePtr{Ptr{Cfloat}}, ldb::ZePtr{Int64}, group_count::Int64, group_size::ZePtr{Int64})::Cint end function onemklDtrsm_batch(device_queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size) @ccall liboneapi_support.onemklDtrsm_batch(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, transa::onemklTranspose, unit_diag::onemklDiag, m::ZePtr{Int64}, n::ZePtr{Int64}, alpha::ZePtr{Cdouble}, a::ZePtr{Ptr{Cdouble}}, lda::ZePtr{Int64}, b::ZePtr{Ptr{Cdouble}}, ldb::ZePtr{Int64}, group_count::Int64, group_size::ZePtr{Int64})::Cint end function onemklCtrsm_batch(device_queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size) @ccall liboneapi_support.onemklCtrsm_batch(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, transa::onemklTranspose, unit_diag::onemklDiag, m::ZePtr{Int64}, n::ZePtr{Int64}, alpha::ZePtr{ComplexF32}, a::ZePtr{Ptr{ComplexF32}}, lda::ZePtr{Int64}, b::ZePtr{Ptr{ComplexF32}}, ldb::ZePtr{Int64}, group_count::Int64, group_size::ZePtr{Int64})::Cint end function onemklZtrsm_batch(device_queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size) @ccall liboneapi_support.onemklZtrsm_batch(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, transa::onemklTranspose, unit_diag::onemklDiag, m::ZePtr{Int64}, n::ZePtr{Int64}, alpha::ZePtr{ComplexF64}, a::ZePtr{Ptr{ComplexF64}}, lda::ZePtr{Int64}, b::ZePtr{Ptr{ComplexF64}}, ldb::ZePtr{Int64}, group_count::Int64, group_size::ZePtr{Int64})::Cint end function onemklHgemm(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklHgemm(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, k::Int64, alpha::Ref{Float16}, a::ZePtr{Float16}, lda::Int64, b::ZePtr{Float16}, ldb::Int64, beta::Ref{Float16}, c::ZePtr{Float16}, ldc::Int64)::Cint end function onemklSgemm(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklSgemm(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, k::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64, beta::Ref{Cfloat}, c::ZePtr{Cfloat}, ldc::Int64)::Cint end function onemklDgemm(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklDgemm(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, k::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64, beta::Ref{Cdouble}, c::ZePtr{Cdouble}, ldc::Int64)::Cint end function onemklCgemm(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklCgemm(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, k::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, beta::Ref{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64)::Cint end function onemklZgemm(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklZgemm(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, k::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, beta::Ref{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64)::Cint end function onemklSsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklSsymm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, m::Int64, n::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64, beta::Ref{Cfloat}, c::ZePtr{Cfloat}, ldc::Int64)::Cint end function onemklDsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklDsymm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, m::Int64, n::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64, beta::Ref{Cdouble}, c::ZePtr{Cdouble}, ldc::Int64)::Cint end function onemklCsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklCsymm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, m::Int64, n::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, beta::Ref{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64)::Cint end function onemklZsymm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklZsymm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, m::Int64, n::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, beta::Ref{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64)::Cint end function onemklChemm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklChemm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, m::Int64, n::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, beta::Ref{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64)::Cint end function onemklZhemm(device_queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklZhemm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, m::Int64, n::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, beta::Ref{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64)::Cint end function onemklSsyrk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) @ccall liboneapi_support.onemklSsyrk(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, beta::Ref{Cfloat}, c::ZePtr{Cfloat}, ldc::Int64)::Cint end function onemklDsyrk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) @ccall liboneapi_support.onemklDsyrk(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, beta::Ref{Cdouble}, c::ZePtr{Cdouble}, ldc::Int64)::Cint end function onemklCsyrk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) @ccall liboneapi_support.onemklCsyrk(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, beta::Ref{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64)::Cint end function onemklZsyrk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) @ccall liboneapi_support.onemklZsyrk(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, beta::Ref{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64)::Cint end function onemklCherk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) @ccall liboneapi_support.onemklCherk(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, beta::Ref{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64)::Cint end function onemklZherk(device_queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc) @ccall liboneapi_support.onemklZherk(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, beta::Ref{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64)::Cint end function onemklSsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklSsyr2k(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64, beta::Ref{Cfloat}, c::ZePtr{Cfloat}, ldc::Int64)::Cint end function onemklDsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklDsyr2k(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64, beta::Ref{Cdouble}, c::ZePtr{Cdouble}, ldc::Int64)::Cint end function onemklCsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklCsyr2k(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, beta::Ref{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64)::Cint end function onemklZsyr2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklZsyr2k(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, beta::Ref{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64)::Cint end function onemklCher2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklCher2k(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, beta::Ref{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64)::Cint end function onemklZher2k(device_queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklZher2k(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, beta::Ref{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64)::Cint end function onemklStrmm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklStrmm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64)::Cint end function onemklDtrmm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklDtrmm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64)::Cint end function onemklCtrmm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklCtrmm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64)::Cint end function onemklZtrmm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklZtrmm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64)::Cint end function onemklStrmm_variant(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklStrmm_variant(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64, beta::Ref{Cfloat}, c::ZePtr{Cfloat}, ldc::Int64)::Cint end function onemklDtrmm_variant(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklDtrmm_variant(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64, beta::Ref{Cdouble}, c::ZePtr{Cdouble}, ldc::Int64)::Cint end function onemklCtrmm_variant(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklCtrmm_variant(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, beta::Ref{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64)::Cint end function onemklZtrmm_variant(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklZtrmm_variant(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, beta::Ref{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64)::Cint end function onemklStrsm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklStrsm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64)::Cint end function onemklDtrsm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklDtrsm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64)::Cint end function onemklCtrsm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklCtrsm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64)::Cint end function onemklZtrsm(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklZtrsm(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64)::Cint end function onemklStrsm_variant(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklStrsm_variant(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64, beta::Ref{Cfloat}, c::ZePtr{Cfloat}, ldc::Int64)::Cint end function onemklDtrsm_variant(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklDtrsm_variant(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64, beta::Ref{Cdouble}, c::ZePtr{Cdouble}, ldc::Int64)::Cint end function onemklCtrsm_variant(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklCtrsm_variant(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, beta::Ref{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64)::Cint end function onemklZtrsm_variant(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklZtrsm_variant(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, beta::Ref{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64)::Cint end function onemklSdgmm(device_queue, left_right, m, n, a, lda, x, incx, c, ldc) @ccall liboneapi_support.onemklSdgmm(device_queue::syclQueue_t, left_right::onemklSide, m::Int64, n::Int64, a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, incx::Int64, c::ZePtr{Cfloat}, ldc::Int64)::Cint end function onemklDdgmm(device_queue, left_right, m, n, a, lda, x, incx, c, ldc) @ccall liboneapi_support.onemklDdgmm(device_queue::syclQueue_t, left_right::onemklSide, m::Int64, n::Int64, a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, incx::Int64, c::ZePtr{Cdouble}, ldc::Int64)::Cint end function onemklCdgmm(device_queue, left_right, m, n, a, lda, x, incx, c, ldc) @ccall liboneapi_support.onemklCdgmm(device_queue::syclQueue_t, left_right::onemklSide, m::Int64, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, c::ZePtr{ComplexF32}, ldc::Int64)::Cint end function onemklZdgmm(device_queue, left_right, m, n, a, lda, x, incx, c, ldc) @ccall liboneapi_support.onemklZdgmm(device_queue::syclQueue_t, left_right::onemklSide, m::Int64, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, c::ZePtr{ComplexF64}, ldc::Int64)::Cint end function onemklSgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklSgemv(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, incx::Int64, beta::Ref{Cfloat}, y::ZePtr{Cfloat}, incy::Int64)::Cint end function onemklDgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklDgemv(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, incx::Int64, beta::Ref{Cdouble}, y::ZePtr{Cdouble}, incy::Int64)::Cint end function onemklCgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklCgemv(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32}, incy::Int64)::Cint end function onemklZgemv(device_queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklZgemv(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64}, incy::Int64)::Cint end function onemklSgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklSgbmv(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, kl::Int64, ku::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, incx::Int64, beta::Ref{Cfloat}, y::ZePtr{Cfloat}, incy::Int64)::Cint end function onemklDgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklDgbmv(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, kl::Int64, ku::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, incx::Int64, beta::Ref{Cdouble}, y::ZePtr{Cdouble}, incy::Int64)::Cint end function onemklCgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklCgbmv(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, kl::Int64, ku::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32}, incy::Int64)::Cint end function onemklZgbmv(device_queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklZgbmv(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, kl::Int64, ku::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64}, incy::Int64)::Cint end function onemklSger(device_queue, m, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklSger(device_queue::syclQueue_t, m::Int64, n::Int64, alpha::Ref{Cfloat}, x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, incy::Int64, a::ZePtr{Cfloat}, lda::Int64)::Cint end function onemklDger(device_queue, m, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklDger(device_queue::syclQueue_t, m::Int64, n::Int64, alpha::Ref{Cdouble}, x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, incy::Int64, a::ZePtr{Cdouble}, lda::Int64)::Cint end function onemklCgerc(device_queue, m, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklCgerc(device_queue::syclQueue_t, m::Int64, n::Int64, alpha::Ref{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, a::ZePtr{ComplexF32}, lda::Int64)::Cint end function onemklZgerc(device_queue, m, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklZgerc(device_queue::syclQueue_t, m::Int64, n::Int64, alpha::Ref{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, a::ZePtr{ComplexF64}, lda::Int64)::Cint end function onemklCgeru(device_queue, m, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklCgeru(device_queue::syclQueue_t, m::Int64, n::Int64, alpha::Ref{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, a::ZePtr{ComplexF32}, lda::Int64)::Cint end function onemklZgeru(device_queue, m, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklZgeru(device_queue::syclQueue_t, m::Int64, n::Int64, alpha::Ref{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, a::ZePtr{ComplexF64}, lda::Int64)::Cint end function onemklChbmv(device_queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklChbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, k::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32}, incy::Int64)::Cint end function onemklZhbmv(device_queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklZhbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, k::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64}, incy::Int64)::Cint end function onemklChemv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklChemv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32}, incy::Int64)::Cint end function onemklZhemv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklZhemv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64}, incy::Int64)::Cint end function onemklCher(device_queue, upper_lower, n, alpha, x, incx, a, lda) @ccall liboneapi_support.onemklCher(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64, a::ZePtr{ComplexF32}, lda::Int64)::Cint end function onemklZher(device_queue, upper_lower, n, alpha, x, incx, a, lda) @ccall liboneapi_support.onemklZher(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64, a::ZePtr{ComplexF64}, lda::Int64)::Cint end function onemklCher2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklCher2(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, a::ZePtr{ComplexF32}, lda::Int64)::Cint end function onemklZher2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklZher2(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, a::ZePtr{ComplexF64}, lda::Int64)::Cint end function onemklChpmv(device_queue, upper_lower, n, alpha, a, x, incx, beta, y, incy) @ccall liboneapi_support.onemklChpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32}, incy::Int64)::Cint end function onemklZhpmv(device_queue, upper_lower, n, alpha, a, x, incx, beta, y, incy) @ccall liboneapi_support.onemklZhpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64}, incy::Int64)::Cint end function onemklChpr(device_queue, upper_lower, n, alpha, x, incx, a) @ccall liboneapi_support.onemklChpr(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Float32}, x::ZePtr{ComplexF32}, incx::Int64, a::ZePtr{ComplexF32})::Cint end function onemklZhpr(device_queue, upper_lower, n, alpha, x, incx, a) @ccall liboneapi_support.onemklZhpr(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Float64}, x::ZePtr{ComplexF64}, incx::Int64, a::ZePtr{ComplexF64})::Cint end function onemklChpr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a) @ccall liboneapi_support.onemklChpr2(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, a::ZePtr{ComplexF32})::Cint end function onemklZhpr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a) @ccall liboneapi_support.onemklZhpr2(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, a::ZePtr{ComplexF64})::Cint end function onemklSsbmv(device_queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklSsbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, k::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, incx::Int64, beta::Ref{Cfloat}, y::ZePtr{Cfloat}, incy::Int64)::Cint end function onemklDsbmv(device_queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklDsbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, k::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, incx::Int64, beta::Ref{Cdouble}, y::ZePtr{Cdouble}, incy::Int64)::Cint end function onemklSsymv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklSsymv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, incx::Int64, beta::Ref{Cfloat}, y::ZePtr{Cfloat}, incy::Int64)::Cint end function onemklDsymv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklDsymv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, incx::Int64, beta::Ref{Cdouble}, y::ZePtr{Cdouble}, incy::Int64)::Cint end function onemklCsymv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklCsymv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, x::ZePtr{ComplexF32}, incx::Int64, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32}, incy::Int64)::Cint end function onemklZsymv(device_queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy) @ccall liboneapi_support.onemklZsymv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, x::ZePtr{ComplexF64}, incx::Int64, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64}, incy::Int64)::Cint end function onemklSsyr(device_queue, upper_lower, n, alpha, x, incx, a, lda) @ccall liboneapi_support.onemklSsyr(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cfloat}, x::ZePtr{Cfloat}, incx::Int64, a::ZePtr{Cfloat}, lda::Int64)::Cint end function onemklDsyr(device_queue, upper_lower, n, alpha, x, incx, a, lda) @ccall liboneapi_support.onemklDsyr(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cdouble}, x::ZePtr{Cdouble}, incx::Int64, a::ZePtr{Cdouble}, lda::Int64)::Cint end function onemklCsyr(device_queue, upper_lower, n, alpha, x, incx, a, lda) @ccall liboneapi_support.onemklCsyr(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64, a::ZePtr{ComplexF32}, lda::Int64)::Cint end function onemklZsyr(device_queue, upper_lower, n, alpha, x, incx, a, lda) @ccall liboneapi_support.onemklZsyr(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64, a::ZePtr{ComplexF64}, lda::Int64)::Cint end function onemklSsyr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklSsyr2(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cfloat}, x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, incy::Int64, a::ZePtr{Cfloat}, lda::Int64)::Cint end function onemklDsyr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklDsyr2(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cdouble}, x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, incy::Int64, a::ZePtr{Cdouble}, lda::Int64)::Cint end function onemklCsyr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklCsyr2(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, a::ZePtr{ComplexF32}, lda::Int64)::Cint end function onemklZsyr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a, lda) @ccall liboneapi_support.onemklZsyr2(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, a::ZePtr{ComplexF64}, lda::Int64)::Cint end function onemklSspmv(device_queue, upper_lower, n, alpha, a, x, incx, beta, y, incy) @ccall liboneapi_support.onemklSspmv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, x::ZePtr{Cfloat}, incx::Int64, beta::Ref{Cfloat}, y::ZePtr{Cfloat}, incy::Int64)::Cint end function onemklDspmv(device_queue, upper_lower, n, alpha, a, x, incx, beta, y, incy) @ccall liboneapi_support.onemklDspmv(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, x::ZePtr{Cdouble}, incx::Int64, beta::Ref{Cdouble}, y::ZePtr{Cdouble}, incy::Int64)::Cint end function onemklSspr(device_queue, upper_lower, n, alpha, x, incx, a) @ccall liboneapi_support.onemklSspr(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cfloat}, x::ZePtr{Cfloat}, incx::Int64, a::ZePtr{Cfloat})::Cint end function onemklDspr(device_queue, upper_lower, n, alpha, x, incx, a) @ccall liboneapi_support.onemklDspr(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cdouble}, x::ZePtr{Cdouble}, incx::Int64, a::ZePtr{Cdouble})::Cint end function onemklSspr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a) @ccall liboneapi_support.onemklSspr2(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cfloat}, x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, incy::Int64, a::ZePtr{Cfloat})::Cint end function onemklDspr2(device_queue, upper_lower, n, alpha, x, incx, y, incy, a) @ccall liboneapi_support.onemklDspr2(device_queue::syclQueue_t, upper_lower::onemklUplo, n::Int64, alpha::Ref{Cdouble}, x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, incy::Int64, a::ZePtr{Cdouble})::Cint end function onemklStbmv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) @ccall liboneapi_support.onemklStbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, k::Int64, a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, incx::Int64)::Cint end function onemklDtbmv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) @ccall liboneapi_support.onemklDtbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, k::Int64, a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, incx::Int64)::Cint end function onemklCtbmv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) @ccall liboneapi_support.onemklCtbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, k::Int64, a::ZePtr{ComplexF32}, lda::Int64, x::ZePtr{ComplexF32}, incx::Int64)::Cint end function onemklZtbmv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) @ccall liboneapi_support.onemklZtbmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, k::Int64, a::ZePtr{ComplexF64}, lda::Int64, x::ZePtr{ComplexF64}, incx::Int64)::Cint end function onemklStbsv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) @ccall liboneapi_support.onemklStbsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, k::Int64, a::Ptr{Cfloat}, lda::Int64, x::Ptr{Cfloat}, incx::Int64)::Cint end function onemklDtbsv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) @ccall liboneapi_support.onemklDtbsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, k::Int64, a::Ptr{Cdouble}, lda::Int64, x::Ptr{Cdouble}, incx::Int64)::Cint end function onemklCtbsv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) @ccall liboneapi_support.onemklCtbsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, k::Int64, a::Ptr{ComplexF32}, lda::Int64, x::Ptr{ComplexF32}, incx::Int64)::Cint end function onemklZtbsv(device_queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx) @ccall liboneapi_support.onemklZtbsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, k::Int64, a::Ptr{ComplexF32}, lda::Int64, x::Ptr{ComplexF32}, incx::Int64)::Cint end function onemklStpmv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) @ccall liboneapi_support.onemklStpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::Ptr{Cfloat}, x::Ptr{Cfloat}, incx::Int64)::Cint end function onemklDtpmv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) @ccall liboneapi_support.onemklDtpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::Ptr{Cdouble}, x::Ptr{Cdouble}, incx::Int64)::Cint end function onemklCtpmv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) @ccall liboneapi_support.onemklCtpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::Ptr{ComplexF32}, x::Ptr{ComplexF32}, incx::Int64)::Cint end function onemklZtpmv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) @ccall liboneapi_support.onemklZtpmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::Ptr{ComplexF32}, x::Ptr{ComplexF32}, incx::Int64)::Cint end function onemklStpsv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) @ccall liboneapi_support.onemklStpsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::Ptr{Cfloat}, x::Ptr{Cfloat}, incx::Int64)::Cint end function onemklDtpsv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) @ccall liboneapi_support.onemklDtpsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::Ptr{Cdouble}, x::Ptr{Cdouble}, incx::Int64)::Cint end function onemklCtpsv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) @ccall liboneapi_support.onemklCtpsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::Ptr{ComplexF32}, x::Ptr{ComplexF32}, incx::Int64)::Cint end function onemklZtpsv(device_queue, upper_lower, trans, unit_diag, n, a, x, incx) @ccall liboneapi_support.onemklZtpsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::Ptr{ComplexF32}, x::Ptr{ComplexF32}, incx::Int64)::Cint end function onemklStrmv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) @ccall liboneapi_support.onemklStrmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, incx::Int64)::Cint end function onemklDtrmv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) @ccall liboneapi_support.onemklDtrmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, incx::Int64)::Cint end function onemklCtrmv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) @ccall liboneapi_support.onemklCtrmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, x::ZePtr{ComplexF32}, incx::Int64)::Cint end function onemklZtrmv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) @ccall liboneapi_support.onemklZtrmv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, x::ZePtr{ComplexF64}, incx::Int64)::Cint end function onemklStrsv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) @ccall liboneapi_support.onemklStrsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::ZePtr{Cfloat}, lda::Int64, x::ZePtr{Cfloat}, incx::Int64)::Cint end function onemklDtrsv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) @ccall liboneapi_support.onemklDtrsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::ZePtr{Cdouble}, lda::Int64, x::ZePtr{Cdouble}, incx::Int64)::Cint end function onemklCtrsv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) @ccall liboneapi_support.onemklCtrsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, x::ZePtr{ComplexF32}, incx::Int64)::Cint end function onemklZtrsv(device_queue, upper_lower, trans, unit_diag, n, a, lda, x, incx) @ccall liboneapi_support.onemklZtrsv(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, x::ZePtr{ComplexF64}, incx::Int64)::Cint end function onemklCdotc(device_queue, n, x, incx, y, incy, result) @ccall liboneapi_support.onemklCdotc(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, result::RefOrZeRef{ComplexF32})::Cint end function onemklZdotc(device_queue, n, x, incx, y, incy, result) @ccall liboneapi_support.onemklZdotc(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, result::RefOrZeRef{ComplexF64})::Cint end function onemklCdotu(device_queue, n, x, incx, y, incy, result) @ccall liboneapi_support.onemklCdotu(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, result::RefOrZeRef{ComplexF32})::Cint end function onemklZdotu(device_queue, n, x, incx, y, incy, result) @ccall liboneapi_support.onemklZdotu(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, result::RefOrZeRef{ComplexF64})::Cint end function onemklSiamax(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklSiamax(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cfloat}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklSiamax_64(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklSiamax_64(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cfloat}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklDiamax(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklDiamax(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cdouble}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklDiamax_64(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklDiamax_64(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cdouble}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklCiamax(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklCiamax(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklCiamax_64(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklCiamax_64(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklZiamax(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklZiamax(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklZiamax_64(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklZiamax_64(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklSiamin(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklSiamin(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cfloat}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklSiamin_64(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklSiamin_64(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cfloat}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklDiamin(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklDiamin(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cdouble}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklDiamin_64(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklDiamin_64(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cdouble}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklCiamin(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklCiamin(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklCiamin_64(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklCiamin_64(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklZiamin(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklZiamin(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklZiamin_64(device_queue, n, x, incx, result, base) @ccall liboneapi_support.onemklZiamin_64(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, result::ZePtr{Int64}, base::onemklIndex)::Cint end function onemklSasum(device_queue, n, x, incx, result) @ccall liboneapi_support.onemklSasum(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cfloat}, incx::Int64, result::ZePtr{Cfloat})::Cint end function onemklDasum(device_queue, n, x, incx, result) @ccall liboneapi_support.onemklDasum(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cdouble}, incx::Int64, result::ZePtr{Cdouble})::Cint end function onemklCasum(device_queue, n, x, incx, result) @ccall liboneapi_support.onemklCasum(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, result::ZePtr{Cfloat})::Cint end function onemklZasum(device_queue, n, x, incx, result) @ccall liboneapi_support.onemklZasum(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, result::ZePtr{Float64})::Cint end function onemklHaxpy(device_queue, n, alpha, x, incx, y, incy) @ccall liboneapi_support.onemklHaxpy(device_queue::syclQueue_t, n::Int64, alpha::Ref{Float16}, x::ZePtr{Float16}, incx::Int64, y::ZePtr{Float16}, incy::Int64)::Cint end function onemklSaxpy(device_queue, n, alpha, x, incx, y, incy) @ccall liboneapi_support.onemklSaxpy(device_queue::syclQueue_t, n::Int64, alpha::Ref{Cfloat}, x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, incy::Int64)::Cint end function onemklDaxpy(device_queue, n, alpha, x, incx, y, incy) @ccall liboneapi_support.onemklDaxpy(device_queue::syclQueue_t, n::Int64, alpha::Ref{Cdouble}, x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, incy::Int64)::Cint end function onemklCaxpy(device_queue, n, alpha, x, incx, y, incy) @ccall liboneapi_support.onemklCaxpy(device_queue::syclQueue_t, n::Int64, alpha::Ref{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64)::Cint end function onemklZaxpy(device_queue, n, alpha, x, incx, y, incy) @ccall liboneapi_support.onemklZaxpy(device_queue::syclQueue_t, n::Int64, alpha::Ref{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64)::Cint end function onemklSaxpby(device_queue, n, alpha, x, incx, beta, y, incy) @ccall liboneapi_support.onemklSaxpby(device_queue::syclQueue_t, n::Int64, alpha::Ref{Cfloat}, x::ZePtr{Cfloat}, incx::Int64, beta::Ref{Cfloat}, y::ZePtr{Cfloat}, incy::Int64)::Cint end function onemklDaxpby(device_queue, n, alpha, x, incx, beta, y, incy) @ccall liboneapi_support.onemklDaxpby(device_queue::syclQueue_t, n::Int64, alpha::Ref{Cdouble}, x::ZePtr{Cdouble}, incx::Int64, beta::Ref{Cdouble}, y::ZePtr{Cdouble}, incy::Int64)::Cint end function onemklCaxpby(device_queue, n, alpha, x, incx, beta, y, incy) @ccall liboneapi_support.onemklCaxpby(device_queue::syclQueue_t, n::Int64, alpha::Ref{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32}, incy::Int64)::Cint end function onemklZaxpby(device_queue, n, alpha, x, incx, beta, y, incy) @ccall liboneapi_support.onemklZaxpby(device_queue::syclQueue_t, n::Int64, alpha::Ref{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64}, incy::Int64)::Cint end function onemklScopy(device_queue, n, x, incx, y, incy) @ccall liboneapi_support.onemklScopy(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, incy::Int64)::Cint end function onemklDcopy(device_queue, n, x, incx, y, incy) @ccall liboneapi_support.onemklDcopy(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, incy::Int64)::Cint end function onemklCcopy(device_queue, n, x, incx, y, incy) @ccall liboneapi_support.onemklCcopy(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64)::Cint end function onemklZcopy(device_queue, n, x, incx, y, incy) @ccall liboneapi_support.onemklZcopy(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64)::Cint end function onemklHdot(device_queue, n, x, incx, y, incy, result) @ccall liboneapi_support.onemklHdot(device_queue::syclQueue_t, n::Int64, x::ZePtr{Float16}, incx::Int64, y::ZePtr{Float16}, incy::Int64, result::RefOrZeRef{Float16})::Cint end function onemklSdot(device_queue, n, x, incx, y, incy, result) @ccall liboneapi_support.onemklSdot(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, incy::Int64, result::RefOrZeRef{Cfloat})::Cint end function onemklDdot(device_queue, n, x, incx, y, incy, result) @ccall liboneapi_support.onemklDdot(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, incy::Int64, result::RefOrZeRef{Cdouble})::Cint end function onemklSsdsdot(device_queue, n, sb, x, incx, y, incy, result) @ccall liboneapi_support.onemklSsdsdot(device_queue::syclQueue_t, n::Int64, sb::Ref{Float32}, x::ZePtr{Float32}, incx::Int64, y::ZePtr{Float32}, incy::Int64, result::Ref{Float32})::Cint end function onemklHnrm2(device_queue, n, x, incx, result) @ccall liboneapi_support.onemklHnrm2(device_queue::syclQueue_t, n::Int64, x::ZePtr{Float16}, incx::Int64, result::RefOrZeRef{Float16})::Cint end function onemklSnrm2(device_queue, n, x, incx, result) @ccall liboneapi_support.onemklSnrm2(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cfloat}, incx::Int64, result::RefOrZeRef{Cfloat})::Cint end function onemklDnrm2(device_queue, n, x, incx, result) @ccall liboneapi_support.onemklDnrm2(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cdouble}, incx::Int64, result::RefOrZeRef{Cdouble})::Cint end function onemklCnrm2(device_queue, n, x, incx, result) @ccall liboneapi_support.onemklCnrm2(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, result::RefOrZeRef{Cfloat})::Cint end function onemklZnrm2(device_queue, n, x, incx, result) @ccall liboneapi_support.onemklZnrm2(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, result::RefOrZeRef{Cdouble})::Cint end function onemklHrot(device_queue, n, x, incx, y, incy, c, s) @ccall liboneapi_support.onemklHrot(device_queue::syclQueue_t, n::Int64, x::ZePtr{Float16}, incx::Int64, y::ZePtr{Float16}, incy::Int64, c::Ref{Float16}, s::Ref{Float16})::Cint end function onemklSrot(device_queue, n, x, incx, y, incy, c, s) @ccall liboneapi_support.onemklSrot(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, incy::Int64, c::Ref{Cfloat}, s::Ref{Cfloat})::Cint end function onemklDrot(device_queue, n, x, incx, y, incy, c, s) @ccall liboneapi_support.onemklDrot(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, incy::Int64, c::Ref{Cdouble}, s::Ref{Cdouble})::Cint end function onemklCSrot(device_queue, n, x, incx, y, incy, c, s) @ccall liboneapi_support.onemklCSrot(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, c::Ref{Float32}, s::Ref{Float32})::Cint end function onemklCrot(device_queue, n, x, incx, y, incy, c, s) @ccall liboneapi_support.onemklCrot(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64, c::Ref{Float32}, s::Ref{ComplexF32})::Cint end function onemklZDrot(device_queue, n, x, incx, y, incy, c, s) @ccall liboneapi_support.onemklZDrot(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, c::Ref{Float64}, s::Ref{Float64})::Cint end function onemklZrot(device_queue, n, x, incx, y, incy, c, s) @ccall liboneapi_support.onemklZrot(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64, c::Ref{Float64}, s::Ref{ComplexF64})::Cint end function onemklSrotg(device_queue, a, b, c, s) @ccall liboneapi_support.onemklSrotg(device_queue::syclQueue_t, a::Ptr{Cfloat}, b::Ptr{Cfloat}, c::Ptr{Cfloat}, s::Ptr{Cfloat})::Cint end function onemklDrotg(device_queue, a, b, c, s) @ccall liboneapi_support.onemklDrotg(device_queue::syclQueue_t, a::Ptr{Cdouble}, b::Ptr{Cdouble}, c::Ptr{Cdouble}, s::Ptr{Cdouble})::Cint end function onemklCrotg(device_queue, a, b, c, s) @ccall liboneapi_support.onemklCrotg(device_queue::syclQueue_t, a::Ptr{ComplexF32}, b::Ptr{ComplexF32}, c::Ptr{Cfloat}, s::Ptr{ComplexF32})::Cint end function onemklZrotg(device_queue, a, b, c, s) @ccall liboneapi_support.onemklZrotg(device_queue::syclQueue_t, a::Ptr{ComplexF32}, b::Ptr{ComplexF32}, c::Ptr{Cdouble}, s::Ptr{ComplexF32})::Cint end function onemklSrotm(device_queue, n, x, incx, y, incy, param) @ccall liboneapi_support.onemklSrotm(device_queue::syclQueue_t, n::Int64, x::Ptr{Cfloat}, incx::Int64, y::Ptr{Cfloat}, incy::Int64, param::Ptr{Cfloat})::Cint end function onemklDrotm(device_queue, n, x, incx, y, incy, param) @ccall liboneapi_support.onemklDrotm(device_queue::syclQueue_t, n::Int64, x::Ptr{Cdouble}, incx::Int64, y::Ptr{Cdouble}, incy::Int64, param::Ptr{Cdouble})::Cint end function onemklSrotmg(device_queue, d1, d2, x1, y1, param) @ccall liboneapi_support.onemklSrotmg(device_queue::syclQueue_t, d1::Ptr{Cfloat}, d2::Ptr{Cfloat}, x1::Ptr{Cfloat}, y1::Ptr{Cfloat}, param::Ptr{Cfloat})::Cint end function onemklDrotmg(device_queue, d1, d2, x1, y1, param) @ccall liboneapi_support.onemklDrotmg(device_queue::syclQueue_t, d1::Ptr{Cdouble}, d2::Ptr{Cdouble}, x1::Ptr{Cdouble}, y1::Ptr{Cdouble}, param::Ptr{Cdouble})::Cint end function onemklHscal(device_queue, n, alpha, x, incx) @ccall liboneapi_support.onemklHscal(device_queue::syclQueue_t, n::Int64, alpha::Ref{Float16}, x::ZePtr{Float16}, incx::Int64)::Cint end function onemklSscal(device_queue, n, alpha, x, incx) @ccall liboneapi_support.onemklSscal(device_queue::syclQueue_t, n::Int64, alpha::Ref{Cfloat}, x::ZePtr{Cfloat}, incx::Int64)::Cint end function onemklDscal(device_queue, n, alpha, x, incx) @ccall liboneapi_support.onemklDscal(device_queue::syclQueue_t, n::Int64, alpha::Ref{Cdouble}, x::ZePtr{Cdouble}, incx::Int64)::Cint end function onemklCSscal(device_queue, n, alpha, x, incx) @ccall liboneapi_support.onemklCSscal(device_queue::syclQueue_t, n::Int64, alpha::Ref{Float32}, x::ZePtr{ComplexF32}, incx::Int64)::Cint end function onemklZDscal(device_queue, n, alpha, x, incx) @ccall liboneapi_support.onemklZDscal(device_queue::syclQueue_t, n::Int64, alpha::Ref{Float64}, x::ZePtr{ComplexF64}, incx::Int64)::Cint end function onemklCscal(device_queue, n, alpha, x, incx) @ccall liboneapi_support.onemklCscal(device_queue::syclQueue_t, n::Int64, alpha::Ref{ComplexF32}, x::ZePtr{ComplexF32}, incx::Int64)::Cint end function onemklZscal(device_queue, n, alpha, x, incx) @ccall liboneapi_support.onemklZscal(device_queue::syclQueue_t, n::Int64, alpha::Ref{ComplexF64}, x::ZePtr{ComplexF64}, incx::Int64)::Cint end function onemklSswap(device_queue, n, x, incx, y, incy) @ccall liboneapi_support.onemklSswap(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cfloat}, incx::Int64, y::ZePtr{Cfloat}, incy::Int64)::Cint end function onemklDswap(device_queue, n, x, incx, y, incy) @ccall liboneapi_support.onemklDswap(device_queue::syclQueue_t, n::Int64, x::ZePtr{Cdouble}, incx::Int64, y::ZePtr{Cdouble}, incy::Int64)::Cint end function onemklCswap(device_queue, n, x, incx, y, incy) @ccall liboneapi_support.onemklCswap(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF32}, incx::Int64, y::ZePtr{ComplexF32}, incy::Int64)::Cint end function onemklZswap(device_queue, n, x, incx, y, incy) @ccall liboneapi_support.onemklZswap(device_queue::syclQueue_t, n::Int64, x::ZePtr{ComplexF64}, incx::Int64, y::ZePtr{ComplexF64}, incy::Int64)::Cint end function onemklHgemm_batch_strided(device_queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklHgemm_batch_strided(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, k::Int64, alpha::Ref{Float16}, a::ZePtr{Float16}, lda::Int64, stride_a::Int64, b::ZePtr{Float16}, ldb::Int64, stride_b::Int64, beta::Ref{Float16}, c::ZePtr{Float16}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklSgemm_batch_strided(device_queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklSgemm_batch_strided(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, k::Int64, alpha::Ref{Cfloat}, a::ZePtr{Cfloat}, lda::Int64, stride_a::Int64, b::ZePtr{Cfloat}, ldb::Int64, stride_b::Int64, beta::Ref{Cfloat}, c::ZePtr{Cfloat}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklDgemm_batch_strided(device_queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklDgemm_batch_strided(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, k::Int64, alpha::Ref{Cdouble}, a::ZePtr{Cdouble}, lda::Int64, stride_a::Int64, b::ZePtr{Cdouble}, ldb::Int64, stride_b::Int64, beta::Ref{Cdouble}, c::ZePtr{Cdouble}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklCgemm_batch_strided(device_queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklCgemm_batch_strided(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, k::Int64, alpha::Ref{ComplexF32}, a::ZePtr{ComplexF32}, lda::Int64, stride_a::Int64, b::ZePtr{ComplexF32}, ldb::Int64, stride_b::Int64, beta::Ref{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklZgemm_batch_strided(device_queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklZgemm_batch_strided(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, k::Int64, alpha::Ref{ComplexF64}, a::ZePtr{ComplexF64}, lda::Int64, stride_a::Int64, b::ZePtr{ComplexF64}, ldb::Int64, stride_b::Int64, beta::Ref{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklSsyrk_batch_strided(device_queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklSsyrk_batch_strided(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ptr{Cfloat}, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, beta::Ptr{Cfloat}, c::Ptr{Cfloat}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklDsyrk_batch_strided(device_queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklDsyrk_batch_strided(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ptr{Cdouble}, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, beta::Ptr{Cdouble}, c::Ptr{Cdouble}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklCsyrk_batch_strided(device_queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklCsyrk_batch_strided(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, beta::Ptr{ComplexF32}, c::Ptr{ComplexF32}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklZsyrk_batch_strided(device_queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklZsyrk_batch_strided(device_queue::syclQueue_t, upper_lower::onemklUplo, trans::onemklTranspose, n::Int64, k::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, beta::Ptr{ComplexF32}, c::Ptr{ComplexF32}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklStrsm_batch_strided(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklStrsm_batch_strided(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ptr{Cfloat}, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, b::Ptr{Cfloat}, ldb::Int64, stride_b::Int64, batch_size::Int64)::Cint end function onemklDtrsm_batch_strided(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklDtrsm_batch_strided(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ptr{Cdouble}, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, b::Ptr{Cdouble}, ldb::Int64, stride_b::Int64, batch_size::Int64)::Cint end function onemklCtrsm_batch_strided(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklCtrsm_batch_strided(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64)::Cint end function onemklZtrsm_batch_strided(device_queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklZtrsm_batch_strided(device_queue::syclQueue_t, left_right::onemklSide, upper_lower::onemklUplo, trans::onemklTranspose, unit_diag::onemklDiag, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64)::Cint end function onemklSgemv_batch_strided(device_queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklSgemv_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cfloat}, a::Ptr{Cfloat}, lda::Int64, stridea::Int64, x::Ptr{Cfloat}, incx::Int64, stridex::Int64, beta::Ptr{Cfloat}, y::Ptr{Cfloat}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklDgemv_batch_strided(device_queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklDgemv_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cdouble}, a::Ptr{Cdouble}, lda::Int64, stridea::Int64, x::Ptr{Cdouble}, incx::Int64, stridex::Int64, beta::Ptr{Cdouble}, y::Ptr{Cdouble}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklCgemv_batch_strided(device_queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklCgemv_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, stridea::Int64, x::Ptr{ComplexF32}, incx::Int64, stridex::Int64, beta::Ptr{ComplexF32}, y::Ptr{ComplexF32}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklZgemv_batch_strided(device_queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklZgemv_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, stridea::Int64, x::Ptr{ComplexF32}, incx::Int64, stridex::Int64, beta::Ptr{ComplexF32}, y::Ptr{ComplexF32}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklSdgmm_batch_strided(device_queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size) @ccall liboneapi_support.onemklSdgmm_batch_strided(device_queue::syclQueue_t, left_right::onemklSide, m::Int64, n::Int64, a::Ptr{Cfloat}, lda::Int64, stridea::Int64, x::Ptr{Cfloat}, incx::Int64, stridex::Int64, c::Ptr{Cfloat}, ldc::Int64, stridec::Int64, batch_size::Int64)::Cint end function onemklDdgmm_batch_strided(device_queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size) @ccall liboneapi_support.onemklDdgmm_batch_strided(device_queue::syclQueue_t, left_right::onemklSide, m::Int64, n::Int64, a::Ptr{Cdouble}, lda::Int64, stridea::Int64, x::Ptr{Cdouble}, incx::Int64, stridex::Int64, c::Ptr{Cdouble}, ldc::Int64, stridec::Int64, batch_size::Int64)::Cint end function onemklCdgmm_batch_strided(device_queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size) @ccall liboneapi_support.onemklCdgmm_batch_strided(device_queue::syclQueue_t, left_right::onemklSide, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stridea::Int64, x::Ptr{ComplexF32}, incx::Int64, stridex::Int64, c::Ptr{ComplexF32}, ldc::Int64, stridec::Int64, batch_size::Int64)::Cint end function onemklZdgmm_batch_strided(device_queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size) @ccall liboneapi_support.onemklZdgmm_batch_strided(device_queue::syclQueue_t, left_right::onemklSide, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stridea::Int64, x::Ptr{ComplexF32}, incx::Int64, stridex::Int64, c::Ptr{ComplexF32}, ldc::Int64, stridec::Int64, batch_size::Int64)::Cint end function onemklSaxpy_batch_strided(device_queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklSaxpy_batch_strided(device_queue::syclQueue_t, n::Int64, alpha::Ptr{Cfloat}, x::Ptr{Cfloat}, incx::Int64, stridex::Int64, y::Ptr{Cfloat}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklDaxpy_batch_strided(device_queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklDaxpy_batch_strided(device_queue::syclQueue_t, n::Int64, alpha::Ptr{Cdouble}, x::Ptr{Cdouble}, incx::Int64, stridex::Int64, y::Ptr{Cdouble}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklCaxpy_batch_strided(device_queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklCaxpy_batch_strided(device_queue::syclQueue_t, n::Int64, alpha::Ptr{ComplexF32}, x::Ptr{ComplexF32}, incx::Int64, stridex::Int64, y::Ptr{ComplexF32}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklZaxpy_batch_strided(device_queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklZaxpy_batch_strided(device_queue::syclQueue_t, n::Int64, alpha::Ptr{ComplexF32}, x::Ptr{ComplexF32}, incx::Int64, stridex::Int64, y::Ptr{ComplexF32}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklScopy_batch_strided(device_queue, n, x, incx, stridex, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklScopy_batch_strided(device_queue::syclQueue_t, n::Int64, x::Ptr{Cfloat}, incx::Int64, stridex::Int64, y::Ptr{Cfloat}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklDcopy_batch_strided(device_queue, n, x, incx, stridex, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklDcopy_batch_strided(device_queue::syclQueue_t, n::Int64, x::Ptr{Cdouble}, incx::Int64, stridex::Int64, y::Ptr{Cdouble}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklCcopy_batch_strided(device_queue, n, x, incx, stridex, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklCcopy_batch_strided(device_queue::syclQueue_t, n::Int64, x::Ptr{ComplexF32}, incx::Int64, stridex::Int64, y::Ptr{ComplexF32}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklZcopy_batch_strided(device_queue, n, x, incx, stridex, y, incy, stridey, batch_size) @ccall liboneapi_support.onemklZcopy_batch_strided(device_queue::syclQueue_t, n::Int64, x::Ptr{ComplexF32}, incx::Int64, stridex::Int64, y::Ptr{ComplexF32}, incy::Int64, stridey::Int64, batch_size::Int64)::Cint end function onemklSgemmt(device_queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklSgemmt(device_queue::syclQueue_t, upper_lower::onemklUplo, transa::onemklTranspose, transb::onemklTranspose, n::Int64, k::Int64, alpha::Ptr{Cfloat}, a::Ptr{Cfloat}, lda::Int64, b::Ptr{Cfloat}, ldb::Int64, beta::Ptr{Cfloat}, c::Ptr{Cfloat}, ldc::Int64)::Cint end function onemklDgemmt(device_queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklDgemmt(device_queue::syclQueue_t, upper_lower::onemklUplo, transa::onemklTranspose, transb::onemklTranspose, n::Int64, k::Int64, alpha::Ptr{Cdouble}, a::Ptr{Cdouble}, lda::Int64, b::Ptr{Cdouble}, ldb::Int64, beta::Ptr{Cdouble}, c::Ptr{Cdouble}, ldc::Int64)::Cint end function onemklCgemmt(device_queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklCgemmt(device_queue::syclQueue_t, upper_lower::onemklUplo, transa::onemklTranspose, transb::onemklTranspose, n::Int64, k::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, beta::Ptr{ComplexF32}, c::Ptr{ComplexF32}, ldc::Int64)::Cint end function onemklZgemmt(device_queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc) @ccall liboneapi_support.onemklZgemmt(device_queue::syclQueue_t, upper_lower::onemklUplo, transa::onemklTranspose, transb::onemklTranspose, n::Int64, k::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, beta::Ptr{ComplexF32}, c::Ptr{ComplexF32}, ldc::Int64)::Cint end function onemklSimatcopy(device_queue, trans, m, n, alpha, ab, lda, ldb) @ccall liboneapi_support.onemklSimatcopy(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cfloat}, ab::Ptr{Cfloat}, lda::Int64, ldb::Int64)::Cint end function onemklDimatcopy(device_queue, trans, m, n, alpha, ab, lda, ldb) @ccall liboneapi_support.onemklDimatcopy(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cdouble}, ab::Ptr{Cdouble}, lda::Int64, ldb::Int64)::Cint end function onemklCimatcopy(device_queue, trans, m, n, alpha, ab, lda, ldb) @ccall liboneapi_support.onemklCimatcopy(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, ab::Ptr{ComplexF32}, lda::Int64, ldb::Int64)::Cint end function onemklZimatcopy(device_queue, trans, m, n, alpha, ab, lda, ldb) @ccall liboneapi_support.onemklZimatcopy(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, ab::Ptr{ComplexF32}, lda::Int64, ldb::Int64)::Cint end function onemklSomatcopy(device_queue, trans, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklSomatcopy(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cfloat}, a::Ptr{Cfloat}, lda::Int64, b::Ptr{Cfloat}, ldb::Int64)::Cint end function onemklDomatcopy(device_queue, trans, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklDomatcopy(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cdouble}, a::Ptr{Cdouble}, lda::Int64, b::Ptr{Cdouble}, ldb::Int64)::Cint end function onemklComatcopy(device_queue, trans, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklComatcopy(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, b::Ptr{ComplexF32}, ldb::Int64)::Cint end function onemklZomatcopy(device_queue, trans, m, n, alpha, a, lda, b, ldb) @ccall liboneapi_support.onemklZomatcopy(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, b::Ptr{ComplexF32}, ldb::Int64)::Cint end function onemklSomatadd(device_queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc) @ccall liboneapi_support.onemklSomatadd(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cfloat}, a::Ptr{Cfloat}, lda::Int64, beta::Ptr{Cfloat}, b::Ptr{Cfloat}, ldb::Int64, c::Ptr{Cfloat}, ldc::Int64)::Cint end function onemklDomatadd(device_queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc) @ccall liboneapi_support.onemklDomatadd(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cdouble}, a::Ptr{Cdouble}, lda::Int64, beta::Ptr{Cdouble}, b::Ptr{Cdouble}, ldb::Int64, c::Ptr{Cdouble}, ldc::Int64)::Cint end function onemklComatadd(device_queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc) @ccall liboneapi_support.onemklComatadd(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, beta::Ptr{ComplexF32}, b::Ptr{ComplexF32}, ldb::Int64, c::Ptr{ComplexF32}, ldc::Int64)::Cint end function onemklZomatadd(device_queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc) @ccall liboneapi_support.onemklZomatadd(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, beta::Ptr{ComplexF32}, b::Ptr{ComplexF32}, ldb::Int64, c::Ptr{ComplexF32}, ldc::Int64)::Cint end function onemklSimatcopy_batch_strided(device_queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size) @ccall liboneapi_support.onemklSimatcopy_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cfloat}, ab::Ptr{Cfloat}, lda::Int64, ldb::Int64, stride::Int64, batch_size::Int64)::Cint end function onemklDimatcopy_batch_strided(device_queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size) @ccall liboneapi_support.onemklDimatcopy_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cdouble}, ab::Ptr{Cdouble}, lda::Int64, ldb::Int64, stride::Int64, batch_size::Int64)::Cint end function onemklCimatcopy_batch_strided(device_queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size) @ccall liboneapi_support.onemklCimatcopy_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, ab::Ptr{ComplexF32}, lda::Int64, ldb::Int64, stride::Int64, batch_size::Int64)::Cint end function onemklZimatcopy_batch_strided(device_queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size) @ccall liboneapi_support.onemklZimatcopy_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, ab::Ptr{ComplexF32}, lda::Int64, ldb::Int64, stride::Int64, batch_size::Int64)::Cint end function onemklSomatcopy_batch_strided(device_queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklSomatcopy_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cfloat}, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, b::Ptr{Cfloat}, ldb::Int64, stride_b::Int64, batch_size::Int64)::Cint end function onemklDomatcopy_batch_strided(device_queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklDomatcopy_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cdouble}, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, b::Ptr{Cdouble}, ldb::Int64, stride_b::Int64, batch_size::Int64)::Cint end function onemklComatcopy_batch_strided(device_queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklComatcopy_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64)::Cint end function onemklZomatcopy_batch_strided(device_queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklZomatcopy_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64)::Cint end function onemklSomatadd_batch_strided(device_queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklSomatadd_batch_strided(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cfloat}, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, beta::Ptr{Cfloat}, b::Ptr{Cfloat}, ldb::Int64, stride_b::Int64, c::Ptr{Cfloat}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklDomatadd_batch_strided(device_queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklDomatadd_batch_strided(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{Cdouble}, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, beta::Ptr{Cdouble}, b::Ptr{Cdouble}, ldb::Int64, stride_b::Int64, c::Ptr{Cdouble}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklComatadd_batch_strided(device_queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklComatadd_batch_strided(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, beta::Ptr{ComplexF32}, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, c::Ptr{ComplexF32}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklZomatadd_batch_strided(device_queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size) @ccall liboneapi_support.onemklZomatadd_batch_strided(device_queue::syclQueue_t, transa::onemklTranspose, transb::onemklTranspose, m::Int64, n::Int64, alpha::Ptr{ComplexF32}, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, beta::Ptr{ComplexF32}, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, c::Ptr{ComplexF32}, ldc::Int64, stride_c::Int64, batch_size::Int64)::Cint end function onemklSpotrf(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSpotrf(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{Cfloat}, lda::Int64, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDpotrf(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDpotrf(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{Cdouble}, lda::Int64, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCpotrf(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCpotrf(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZpotrf(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZpotrf(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSpotrs(device_queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSpotrs(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDpotrs(device_queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDpotrs(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCpotrs(device_queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCpotrs(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZpotrs(device_queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZpotrs(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSpotri(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSpotri(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{Cfloat}, lda::Int64, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDpotri(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDpotri(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{Cdouble}, lda::Int64, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCpotri(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCpotri(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZpotri(device_queue, uplo, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZpotri(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklStrtri(device_queue, uplo, diag, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklStrtri(device_queue::syclQueue_t, uplo::onemklUplo, diag::onemklDiag, n::Int64, a::ZePtr{Cfloat}, lda::Int64, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDtrtri(device_queue, uplo, diag, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDtrtri(device_queue::syclQueue_t, uplo::onemklUplo, diag::onemklDiag, n::Int64, a::ZePtr{Cdouble}, lda::Int64, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCtrtri(device_queue, uplo, diag, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCtrtri(device_queue::syclQueue_t, uplo::onemklUplo, diag::onemklDiag, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZtrtri(device_queue, uplo, diag, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZtrtri(device_queue::syclQueue_t, uplo::onemklUplo, diag::onemklDiag, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSgesv(device_queue, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgesv(device_queue::syclQueue_t, n::Int64, nrhs::Int64, a::ZePtr{Cfloat}, lda::Int64, ipiv::ZePtr{Cfloat}, b::ZePtr{Cfloat}, ldb::Int64, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgesv(device_queue, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgesv(device_queue::syclQueue_t, n::Int64, nrhs::Int64, a::ZePtr{Cdouble}, lda::Int64, ipiv::ZePtr{Cdouble}, b::ZePtr{Cdouble}, ldb::Int64, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgesv(device_queue, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgesv(device_queue::syclQueue_t, n::Int64, nrhs::Int64, a::ZePtr{ComplexF32}, lda::Int64, ipiv::ZePtr{ComplexF32}, b::ZePtr{ComplexF32}, ldb::Int64, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgesv(device_queue, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgesv(device_queue::syclQueue_t, n::Int64, nrhs::Int64, a::ZePtr{ComplexF64}, lda::Int64, ipiv::ZePtr{ComplexF64}, b::ZePtr{ComplexF64}, ldb::Int64, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklCgebrd(device_queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgebrd(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, d::ZePtr{Float32}, e::ZePtr{ComplexF32}, tauq::ZePtr{ComplexF32}, taup::ZePtr{ComplexF32}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgebrd(device_queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgebrd(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{Float64}, lda::Int64, d::ZePtr{Float64}, e::ZePtr{Float64}, tauq::ZePtr{Float64}, taup::ZePtr{Float64}, scratchpad::ZePtr{Float64}, scratchpad_size::Int64)::Cint end function onemklSgebrd(device_queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgebrd(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{Float32}, lda::Int64, d::ZePtr{Float32}, e::ZePtr{Float32}, tauq::ZePtr{Float32}, taup::ZePtr{Float32}, scratchpad::ZePtr{Float32}, scratchpad_size::Int64)::Cint end function onemklZgebrd(device_queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgebrd(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, d::ZePtr{Float64}, e::ZePtr{ComplexF64}, tauq::ZePtr{ComplexF64}, taup::ZePtr{ComplexF64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklCgeqrf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgeqrf(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, tau::ZePtr{ComplexF32}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgeqrf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgeqrf(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{Cdouble}, lda::Int64, tau::ZePtr{Cdouble}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgeqrf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgeqrf(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{Cfloat}, lda::Int64, tau::ZePtr{Cfloat}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgeqrf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgeqrf(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, tau::ZePtr{ComplexF64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklCgesvd(device_queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgesvd(device_queue::syclQueue_t, jobu::onemklJobsvd, jobvt::onemklJobsvd, m::Int64, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, s::ZePtr{Float32}, u::ZePtr{ComplexF32}, ldu::Int64, vt::ZePtr{ComplexF32}, ldvt::Int64, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgesvd(device_queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgesvd(device_queue::syclQueue_t, jobu::onemklJobsvd, jobvt::onemklJobsvd, m::Int64, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, s::ZePtr{Float64}, u::ZePtr{ComplexF64}, ldu::Int64, vt::ZePtr{ComplexF64}, ldvt::Int64, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklDgesvd(device_queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgesvd(device_queue::syclQueue_t, jobu::onemklJobsvd, jobvt::onemklJobsvd, m::Int64, n::Int64, a::ZePtr{Float64}, lda::Int64, s::ZePtr{Float64}, u::ZePtr{Float64}, ldu::Int64, vt::ZePtr{Float64}, ldvt::Int64, scratchpad::ZePtr{Float64}, scratchpad_size::Int64)::Cint end function onemklSgesvd(device_queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgesvd(device_queue::syclQueue_t, jobu::onemklJobsvd, jobvt::onemklJobsvd, m::Int64, n::Int64, a::ZePtr{Float32}, lda::Int64, s::ZePtr{Float32}, u::ZePtr{Float32}, ldu::Int64, vt::ZePtr{Float32}, ldvt::Int64, scratchpad::ZePtr{Float32}, scratchpad_size::Int64)::Cint end function onemklCgesvda_batch_strided(device_queue, iparm, irank, m, n, a, lda, stride_a, s, stride_s, u, ldu, stride_u, vt, ldvt, stride_vt, tolerance, residual, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgesvda_batch_strided(device_queue::syclQueue_t, iparm::Ptr{Int64}, irank::Ptr{Int64}, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, s::Ptr{Cfloat}, stride_s::Int64, u::Ptr{ComplexF32}, ldu::Int64, stride_u::Int64, vt::Ptr{ComplexF32}, ldvt::Int64, stride_vt::Int64, tolerance::Ptr{Cfloat}, residual::Ptr{Cfloat}, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgesvda_batch_strided(device_queue, iparm, irank, m, n, a, lda, stride_a, s, stride_s, u, ldu, stride_u, vt, ldvt, stride_vt, tolerance, residual, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgesvda_batch_strided(device_queue::syclQueue_t, iparm::Ptr{Int64}, irank::Ptr{Int64}, m::Int64, n::Int64, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, s::Ptr{Cdouble}, stride_s::Int64, u::Ptr{Cdouble}, ldu::Int64, stride_u::Int64, vt::Ptr{Cdouble}, ldvt::Int64, stride_vt::Int64, tolerance::Ptr{Cdouble}, residual::Ptr{Cdouble}, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgesvda_batch_strided(device_queue, iparm, irank, m, n, a, lda, stride_a, s, stride_s, u, ldu, stride_u, vt, ldvt, stride_vt, tolerance, residual, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgesvda_batch_strided(device_queue::syclQueue_t, iparm::Ptr{Int64}, irank::Ptr{Int64}, m::Int64, n::Int64, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, s::Ptr{Cfloat}, stride_s::Int64, u::Ptr{Cfloat}, ldu::Int64, stride_u::Int64, vt::Ptr{Cfloat}, ldvt::Int64, stride_vt::Int64, tolerance::Ptr{Cfloat}, residual::Ptr{Cfloat}, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgesvda_batch_strided(device_queue, iparm, irank, m, n, a, lda, stride_a, s, stride_s, u, ldu, stride_u, vt, ldvt, stride_vt, tolerance, residual, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgesvda_batch_strided(device_queue::syclQueue_t, iparm::Ptr{Int64}, irank::Ptr{Int64}, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, s::Ptr{Cdouble}, stride_s::Int64, u::Ptr{ComplexF32}, ldu::Int64, stride_u::Int64, vt::Ptr{ComplexF32}, ldvt::Int64, stride_vt::Int64, tolerance::Ptr{Cdouble}, residual::Ptr{Cdouble}, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklCgetrf(device_queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetrf(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgetrf(device_queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetrf(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{Cdouble}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgetrf(device_queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetrf(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{Cfloat}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgetrf(device_queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetrf(device_queue::syclQueue_t, m::Int64, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklCgetrf_batch(device_queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetrf_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::ZePtr{Ptr{ComplexF32}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgetrf_batch(device_queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetrf_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::ZePtr{Ptr{Cdouble}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgetrf_batch(device_queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetrf_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::ZePtr{Ptr{Cfloat}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgetrf_batch(device_queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetrf_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::ZePtr{Ptr{ComplexF64}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklCgetrf_batch_strided(device_queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetrf_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgetrf_batch_strided(device_queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetrf_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgetrf_batch_strided(device_queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetrf_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgetrf_batch_strided(device_queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetrf_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklCgetrfnp(device_queue, m, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetrfnp(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgetrfnp(device_queue, m, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetrfnp(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{Cdouble}, lda::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgetrfnp(device_queue, m, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetrfnp(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{Cfloat}, lda::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgetrfnp(device_queue, m, n, a, lda, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetrfnp(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklCgetrfnp_batch(device_queue, m, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetrfnp_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::Ptr{Ptr{ComplexF32}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgetrfnp_batch(device_queue, m, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetrfnp_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::Ptr{Ptr{Cdouble}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgetrfnp_batch(device_queue, m, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetrfnp_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::Ptr{Ptr{Cfloat}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgetrfnp_batch(device_queue, m, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetrfnp_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::Ptr{Ptr{ComplexF32}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklCgetrfnp_batch_strided(device_queue, m, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetrfnp_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgetrfnp_batch_strided(device_queue, m, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetrfnp_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgetrfnp_batch_strided(device_queue, m, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetrfnp_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgetrfnp_batch_strided(device_queue, m, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetrfnp_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklCgetri(device_queue, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetri(device_queue::syclQueue_t, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgetri(device_queue, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetri(device_queue::syclQueue_t, n::Int64, a::ZePtr{Cdouble}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgetri(device_queue, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetri(device_queue::syclQueue_t, n::Int64, a::ZePtr{Cfloat}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgetri(device_queue, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetri(device_queue::syclQueue_t, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklCgetrs(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetrs(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::ZePtr{ComplexF32}, lda::Int64, ipiv::ZePtr{Int64}, b::ZePtr{ComplexF32}, ldb::Int64, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgetrs(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetrs(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::ZePtr{Cdouble}, lda::Int64, ipiv::ZePtr{Int64}, b::ZePtr{Cdouble}, ldb::Int64, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgetrs(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetrs(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::ZePtr{Cfloat}, lda::Int64, ipiv::ZePtr{Int64}, b::ZePtr{Cfloat}, ldb::Int64, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgetrs(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetrs(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::ZePtr{ComplexF64}, lda::Int64, ipiv::ZePtr{Int64}, b::ZePtr{ComplexF64}, ldb::Int64, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklCgetrs_batch_strided(device_queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetrs_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgetrs_batch_strided(device_queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetrs_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, b::Ptr{Cdouble}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgetrs_batch_strided(device_queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetrs_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, b::Ptr{Cfloat}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgetrs_batch_strided(device_queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetrs_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklCgetrsnp_batch_strided(device_queue, trans, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetrsnp_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDgetrsnp_batch_strided(device_queue, trans, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetrsnp_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, b::Ptr{Cdouble}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSgetrsnp_batch_strided(device_queue, trans, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetrsnp_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, b::Ptr{Cfloat}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZgetrsnp_batch_strided(device_queue, trans, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetrsnp_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklCheev(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCheev(device_queue::syclQueue_t, jobz::onemklCompz, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, w::Ptr{Cfloat}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZheev(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZheev(device_queue::syclQueue_t, jobz::onemklCompz, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, w::Ptr{Cdouble}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklCheevd(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCheevd(device_queue::syclQueue_t, jobz::onemklJob, uplo::onemklUplo, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, w::ZePtr{Float32}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZheevd(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZheevd(device_queue::syclQueue_t, jobz::onemklJob, uplo::onemklUplo, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, w::ZePtr{Float64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklCheevx(device_queue, jobz, range, uplo, n, a, lda, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCheevx(device_queue::syclQueue_t, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, vl::Ptr{Cfloat}, vu::Ptr{Cfloat}, il::Int64, iu::Int64, abstol::Ptr{Cfloat}, m::Ptr{Int64}, w::Ptr{Cfloat}, z::Ptr{ComplexF32}, ldz::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZheevx(device_queue, jobz, range, uplo, n, a, lda, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZheevx(device_queue::syclQueue_t, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, vl::Ptr{Cdouble}, vu::Ptr{Cdouble}, il::Int64, iu::Int64, abstol::Ptr{Cdouble}, m::Ptr{Int64}, w::Ptr{Cdouble}, z::Ptr{ComplexF32}, ldz::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklChegvd(device_queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklChegvd(device_queue::syclQueue_t, itype::Int64, jobz::onemklJob, uplo::onemklUplo, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, b::ZePtr{ComplexF32}, ldb::Int64, w::ZePtr{Float32}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZhegvd(device_queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZhegvd(device_queue::syclQueue_t, itype::Int64, jobz::onemklJob, uplo::onemklUplo, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, b::ZePtr{ComplexF64}, ldb::Int64, w::ZePtr{Float64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklChegvx(device_queue, itype, jobz, range, uplo, n, a, lda, b, ldb, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklChegvx(device_queue::syclQueue_t, itype::Int64, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, vl::Ptr{Cfloat}, vu::Ptr{Cfloat}, il::Int64, iu::Int64, abstol::Ptr{Cfloat}, m::Ptr{Int64}, w::Ptr{Cfloat}, z::Ptr{ComplexF32}, ldz::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZhegvx(device_queue, itype, jobz, range, uplo, n, a, lda, b, ldb, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZhegvx(device_queue::syclQueue_t, itype::Int64, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, vl::Ptr{Cdouble}, vu::Ptr{Cdouble}, il::Int64, iu::Int64, abstol::Ptr{Cdouble}, m::Ptr{Int64}, w::Ptr{Cdouble}, z::Ptr{ComplexF32}, ldz::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklChetrd(device_queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklChetrd(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, d::Ptr{Cfloat}, e::Ptr{Cfloat}, tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZhetrd(device_queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZhetrd(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, d::Ptr{Cdouble}, e::Ptr{Cdouble}, tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklChetrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklChetrf(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, ipiv::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZhetrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZhetrf(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, ipiv::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSorgbr(device_queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSorgbr(device_queue::syclQueue_t, vec::onemklGenerate, m::Int64, n::Int64, k::Int64, a::Ptr{Cfloat}, lda::Int64, tau::Ptr{Cfloat}, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDorgbr(device_queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDorgbr(device_queue::syclQueue_t, vec::onemklGenerate, m::Int64, n::Int64, k::Int64, a::Ptr{Cdouble}, lda::Int64, tau::Ptr{Cdouble}, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklDorgqr(device_queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDorgqr(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, a::ZePtr{Cdouble}, lda::Int64, tau::ZePtr{Cdouble}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSorgqr(device_queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSorgqr(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, a::ZePtr{Cfloat}, lda::Int64, tau::ZePtr{Cfloat}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDormqr(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDormqr(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, a::ZePtr{Cdouble}, lda::Int64, tau::ZePtr{Cdouble}, c::ZePtr{Cdouble}, ldc::Int64, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSormqr(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSormqr(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, a::ZePtr{Cfloat}, lda::Int64, tau::ZePtr{Cfloat}, c::ZePtr{Cfloat}, ldc::Int64, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklCsteqr(device_queue, compz, n, d, e, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCsteqr(device_queue::syclQueue_t, compz::onemklCompz, n::Int64, d::Ptr{Cfloat}, e::Ptr{Cfloat}, z::Ptr{ComplexF32}, ldz::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDsteqr(device_queue, compz, n, d, e, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDsteqr(device_queue::syclQueue_t, compz::onemklCompz, n::Int64, d::Ptr{Cdouble}, e::Ptr{Cdouble}, z::Ptr{Cdouble}, ldz::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSsteqr(device_queue, compz, n, d, e, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSsteqr(device_queue::syclQueue_t, compz::onemklCompz, n::Int64, d::Ptr{Cfloat}, e::Ptr{Cfloat}, z::Ptr{Cfloat}, ldz::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZsteqr(device_queue, compz, n, d, e, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZsteqr(device_queue::syclQueue_t, compz::onemklCompz, n::Int64, d::Ptr{Cdouble}, e::Ptr{Cdouble}, z::Ptr{ComplexF32}, ldz::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDsyev(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDsyev(device_queue::syclQueue_t, jobz::onemklCompz, uplo::onemklUplo, n::Int64, a::Ptr{Cdouble}, lda::Int64, w::Ptr{Cdouble}, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSsyev(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSsyev(device_queue::syclQueue_t, jobz::onemklCompz, uplo::onemklUplo, n::Int64, a::Ptr{Cfloat}, lda::Int64, w::Ptr{Cfloat}, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDsyevd(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDsyevd(device_queue::syclQueue_t, jobz::onemklJob, uplo::onemklUplo, n::Int64, a::ZePtr{Cdouble}, lda::Int64, w::ZePtr{Cdouble}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSsyevd(device_queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSsyevd(device_queue::syclQueue_t, jobz::onemklJob, uplo::onemklUplo, n::Int64, a::ZePtr{Cfloat}, lda::Int64, w::ZePtr{Cfloat}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDsyevx(device_queue, jobz, range, uplo, n, a, lda, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDsyevx(device_queue::syclQueue_t, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, a::Ptr{Cdouble}, lda::Int64, vl::Ptr{Cdouble}, vu::Ptr{Cdouble}, il::Int64, iu::Int64, abstol::Ptr{Cdouble}, m::Ptr{Int64}, w::Ptr{Cdouble}, z::Ptr{Cdouble}, ldz::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSsyevx(device_queue, jobz, range, uplo, n, a, lda, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSsyevx(device_queue::syclQueue_t, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, a::Ptr{Cfloat}, lda::Int64, vl::Ptr{Cfloat}, vu::Ptr{Cfloat}, il::Int64, iu::Int64, abstol::Ptr{Cfloat}, m::Ptr{Int64}, w::Ptr{Cfloat}, z::Ptr{Cfloat}, ldz::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDsygvd(device_queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDsygvd(device_queue::syclQueue_t, itype::Int64, jobz::onemklJob, uplo::onemklUplo, n::Int64, a::ZePtr{Cdouble}, lda::Int64, b::ZePtr{Cdouble}, ldb::Int64, w::ZePtr{Cdouble}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSsygvd(device_queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSsygvd(device_queue::syclQueue_t, itype::Int64, jobz::onemklJob, uplo::onemklUplo, n::Int64, a::ZePtr{Cfloat}, lda::Int64, b::ZePtr{Cfloat}, ldb::Int64, w::ZePtr{Cfloat}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDsygvx(device_queue, itype, jobz, range, uplo, n, a, lda, b, ldb, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDsygvx(device_queue::syclQueue_t, itype::Int64, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, a::Ptr{Cdouble}, lda::Int64, b::Ptr{Cdouble}, ldb::Int64, vl::Ptr{Cdouble}, vu::Ptr{Cdouble}, il::Int64, iu::Int64, abstol::Ptr{Cdouble}, m::Ptr{Int64}, w::Ptr{Cdouble}, z::Ptr{Cdouble}, ldz::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSsygvx(device_queue, itype, jobz, range, uplo, n, a, lda, b, ldb, vl, vu, il, iu, abstol, m, w, z, ldz, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSsygvx(device_queue::syclQueue_t, itype::Int64, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, a::Ptr{Cfloat}, lda::Int64, b::Ptr{Cfloat}, ldb::Int64, vl::Ptr{Cfloat}, vu::Ptr{Cfloat}, il::Int64, iu::Int64, abstol::Ptr{Cfloat}, m::Ptr{Int64}, w::Ptr{Cfloat}, z::Ptr{Cfloat}, ldz::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDsytrd(device_queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDsytrd(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{Cdouble}, lda::Int64, d::Ptr{Cdouble}, e::Ptr{Cdouble}, tau::Ptr{Cdouble}, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklSsytrd(device_queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSsytrd(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{Cfloat}, lda::Int64, d::Ptr{Cfloat}, e::Ptr{Cfloat}, tau::Ptr{Cfloat}, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklCtrtrs(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCtrtrs(device_queue::syclQueue_t, uplo::onemklUplo, trans::onemklTranspose, diag::onemklDiag, n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklDtrtrs(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDtrtrs(device_queue::syclQueue_t, uplo::onemklUplo, trans::onemklTranspose, diag::onemklDiag, n::Int64, nrhs::Int64, a::Ptr{Cdouble}, lda::Int64, b::Ptr{Cdouble}, ldb::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklStrtrs(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklStrtrs(device_queue::syclQueue_t, uplo::onemklUplo, trans::onemklTranspose, diag::onemklDiag, n::Int64, nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, b::Ptr{Cfloat}, ldb::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklZtrtrs(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZtrtrs(device_queue::syclQueue_t, uplo::onemklUplo, trans::onemklTranspose, diag::onemklDiag, n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklCungbr(device_queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCungbr(device_queue::syclQueue_t, vec::onemklGenerate, m::Int64, n::Int64, k::Int64, a::Ptr{ComplexF32}, lda::Int64, tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZungbr(device_queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZungbr(device_queue::syclQueue_t, vec::onemklGenerate, m::Int64, n::Int64, k::Int64, a::Ptr{ComplexF32}, lda::Int64, tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklCungqr(device_queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCungqr(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, a::ZePtr{ComplexF32}, lda::Int64, tau::ZePtr{ComplexF32}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZungqr(device_queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZungqr(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, a::ZePtr{ComplexF64}, lda::Int64, tau::ZePtr{ComplexF64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklCunmqr(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCunmqr(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, a::ZePtr{ComplexF32}, lda::Int64, tau::ZePtr{ComplexF32}, c::ZePtr{ComplexF32}, ldc::Int64, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZunmqr(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZunmqr(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, a::ZePtr{ComplexF64}, lda::Int64, tau::ZePtr{ComplexF64}, c::ZePtr{ComplexF64}, ldc::Int64, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSgerqf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgerqf(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{Cfloat}, lda::Int64, tau::Ptr{Cfloat}, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgerqf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgerqf(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{Cdouble}, lda::Int64, tau::Ptr{Cdouble}, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgerqf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgerqf(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgerqf(device_queue, m, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgerqf(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSormrq(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSormrq(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, a::Ptr{Cfloat}, lda::Int64, tau::Ptr{Cfloat}, c::Ptr{Cfloat}, ldc::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDormrq(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDormrq(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, a::Ptr{Cdouble}, lda::Int64, tau::Ptr{Cdouble}, c::Ptr{Cdouble}, ldc::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCunmrq(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCunmrq(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, a::Ptr{ComplexF32}, lda::Int64, tau::Ptr{ComplexF32}, c::Ptr{ComplexF32}, ldc::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZunmrq(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZunmrq(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, a::Ptr{ComplexF32}, lda::Int64, tau::Ptr{ComplexF32}, c::Ptr{ComplexF32}, ldc::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSsytrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSsytrf(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{Cfloat}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDsytrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDsytrf(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{Cdouble}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCsytrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCsytrf(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{ComplexF32}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZsytrf(device_queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZsytrf(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::ZePtr{ComplexF64}, lda::Int64, ipiv::ZePtr{Int64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSorgtr(device_queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSorgtr(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{Cfloat}, lda::Int64, tau::Ptr{Cfloat}, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDorgtr(device_queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDorgtr(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{Cdouble}, lda::Int64, tau::Ptr{Cdouble}, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCungtr(device_queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCungtr(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZungtr(device_queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZungtr(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, tau::Ptr{ComplexF32}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSormtr(device_queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSormtr(device_queue::syclQueue_t, side::onemklSide, uplo::onemklUplo, trans::onemklTranspose, m::Int64, n::Int64, a::Ptr{Cfloat}, lda::Int64, tau::Ptr{Cfloat}, c::Ptr{Cfloat}, ldc::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDormtr(device_queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDormtr(device_queue::syclQueue_t, side::onemklSide, uplo::onemklUplo, trans::onemklTranspose, m::Int64, n::Int64, a::Ptr{Cdouble}, lda::Int64, tau::Ptr{Cdouble}, c::Ptr{Cdouble}, ldc::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCunmtr(device_queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCunmtr(device_queue::syclQueue_t, side::onemklSide, uplo::onemklUplo, trans::onemklTranspose, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, tau::Ptr{ComplexF32}, c::Ptr{ComplexF32}, ldc::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZunmtr(device_queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZunmtr(device_queue::syclQueue_t, side::onemklSide, uplo::onemklUplo, trans::onemklTranspose, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, tau::Ptr{ComplexF32}, c::Ptr{ComplexF32}, ldc::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSgels(device_queue, trans, m, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgels(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, b::Ptr{Cfloat}, ldb::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgels(device_queue, trans, m, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgels(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, a::Ptr{Cdouble}, lda::Int64, b::Ptr{Cdouble}, ldb::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgels(device_queue, trans, m, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgels(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgels(device_queue, trans, m, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgels(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, b::Ptr{ComplexF32}, ldb::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSpotrf_batch(device_queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSpotrf_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, a::ZePtr{Ptr{Cfloat}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDpotrf_batch(device_queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDpotrf_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, a::ZePtr{Ptr{Cdouble}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCpotrf_batch(device_queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCpotrf_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, a::ZePtr{Ptr{ComplexF32}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZpotrf_batch(device_queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZpotrf_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, a::ZePtr{Ptr{ComplexF64}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSpotrs_batch(device_queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSpotrs_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::ZePtr{Ptr{Cfloat}}, lda::Ptr{Int64}, b::ZePtr{Ptr{Cfloat}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDpotrs_batch(device_queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDpotrs_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::ZePtr{Ptr{Cdouble}}, lda::Ptr{Int64}, b::ZePtr{Ptr{Cdouble}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCpotrs_batch(device_queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCpotrs_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::ZePtr{Ptr{ComplexF32}}, lda::Ptr{Int64}, b::ZePtr{Ptr{ComplexF32}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZpotrs_batch(device_queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZpotrs_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::ZePtr{Ptr{ComplexF64}}, lda::Ptr{Int64}, b::ZePtr{Ptr{ComplexF64}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSgeinv_batch(device_queue, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgeinv_batch(device_queue::syclQueue_t, n::Ptr{Int64}, a::Ptr{Ptr{Cfloat}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgeinv_batch(device_queue, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgeinv_batch(device_queue::syclQueue_t, n::Ptr{Int64}, a::Ptr{Ptr{Cdouble}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgeinv_batch(device_queue, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgeinv_batch(device_queue::syclQueue_t, n::Ptr{Int64}, a::Ptr{Ptr{ComplexF32}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgeinv_batch(device_queue, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgeinv_batch(device_queue::syclQueue_t, n::Ptr{Int64}, a::Ptr{Ptr{ComplexF32}}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSgetrs_batch(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetrs_batch(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::ZePtr{Ptr{Cfloat}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, b::ZePtr{Ptr{Cfloat}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgetrs_batch(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetrs_batch(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::ZePtr{Ptr{Cdouble}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, b::ZePtr{Ptr{Cdouble}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgetrs_batch(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetrs_batch(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::ZePtr{Ptr{ComplexF32}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, b::ZePtr{Ptr{ComplexF32}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgetrs_batch(device_queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetrs_batch(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::ZePtr{Ptr{ComplexF64}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, b::ZePtr{Ptr{ComplexF64}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSgetri_batch(device_queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetri_batch(device_queue::syclQueue_t, n::Ptr{Int64}, a::ZePtr{Ptr{Cfloat}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgetri_batch(device_queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetri_batch(device_queue::syclQueue_t, n::Ptr{Int64}, a::ZePtr{Ptr{Cdouble}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgetri_batch(device_queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetri_batch(device_queue::syclQueue_t, n::Ptr{Int64}, a::ZePtr{Ptr{ComplexF32}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgetri_batch(device_queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetri_batch(device_queue::syclQueue_t, n::Ptr{Int64}, a::ZePtr{Ptr{ComplexF64}}, lda::Ptr{Int64}, ipiv::ZePtr{Ptr{Int64}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSgeqrf_batch(device_queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgeqrf_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::ZePtr{Ptr{Cfloat}}, lda::Ptr{Int64}, tau::ZePtr{Ptr{Cfloat}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgeqrf_batch(device_queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgeqrf_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::ZePtr{Ptr{Cdouble}}, lda::Ptr{Int64}, tau::ZePtr{Ptr{Cdouble}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgeqrf_batch(device_queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgeqrf_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::ZePtr{Ptr{ComplexF32}}, lda::Ptr{Int64}, tau::ZePtr{Ptr{ComplexF32}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgeqrf_batch(device_queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgeqrf_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, a::ZePtr{Ptr{ComplexF64}}, lda::Ptr{Int64}, tau::ZePtr{Ptr{ComplexF64}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSorgqr_batch(device_queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSorgqr_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, a::ZePtr{Ptr{Cfloat}}, lda::Ptr{Int64}, tau::ZePtr{Ptr{Cfloat}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDorgqr_batch(device_queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDorgqr_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, a::ZePtr{Ptr{Cdouble}}, lda::Ptr{Int64}, tau::ZePtr{Ptr{Cdouble}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCungqr_batch(device_queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCungqr_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, a::ZePtr{Ptr{ComplexF32}}, lda::Ptr{Int64}, tau::ZePtr{Ptr{ComplexF32}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZungqr_batch(device_queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZungqr_batch(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, a::ZePtr{Ptr{ComplexF64}}, lda::Ptr{Int64}, tau::ZePtr{Ptr{ComplexF64}}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::ZePtr{ComplexF64}, scratchpad_size::Int64)::Cint end function onemklSormqr_batch(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSormqr_batch(device_queue::syclQueue_t, side::Ptr{onemklSide}, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, a::Ptr{Ptr{Cfloat}}, lda::Ptr{Int64}, tau::Ptr{Ptr{Cfloat}}, c::Ptr{Ptr{Cfloat}}, ldc::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDormqr_batch(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDormqr_batch(device_queue::syclQueue_t, side::Ptr{onemklSide}, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, a::Ptr{Ptr{Cdouble}}, lda::Ptr{Int64}, tau::Ptr{Ptr{Cdouble}}, c::Ptr{Ptr{Cdouble}}, ldc::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCunmqr_batch(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCunmqr_batch(device_queue::syclQueue_t, side::Ptr{onemklSide}, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, a::Ptr{Ptr{ComplexF32}}, lda::Ptr{Int64}, tau::Ptr{Ptr{ComplexF32}}, c::Ptr{Ptr{ComplexF32}}, ldc::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZunmqr_batch(device_queue, side, trans, m, n, k, a, lda, tau, c, ldc, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZunmqr_batch(device_queue::syclQueue_t, side::Ptr{onemklSide}, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, a::Ptr{Ptr{ComplexF32}}, lda::Ptr{Int64}, tau::Ptr{Ptr{ComplexF32}}, c::Ptr{Ptr{ComplexF32}}, ldc::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklStrtrs_batch(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklStrtrs_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, trans::Ptr{onemklTranspose}, diag::Ptr{onemklDiag}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::Ptr{Ptr{Cfloat}}, lda::Ptr{Int64}, b::Ptr{Ptr{Cfloat}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDtrtrs_batch(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDtrtrs_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, trans::Ptr{onemklTranspose}, diag::Ptr{onemklDiag}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::Ptr{Ptr{Cdouble}}, lda::Ptr{Int64}, b::Ptr{Ptr{Cdouble}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCtrtrs_batch(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCtrtrs_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, trans::Ptr{onemklTranspose}, diag::Ptr{onemklDiag}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::Ptr{Ptr{ComplexF32}}, lda::Ptr{Int64}, b::Ptr{Ptr{ComplexF32}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZtrtrs_batch(device_queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZtrtrs_batch(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, trans::Ptr{onemklTranspose}, diag::Ptr{onemklDiag}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::Ptr{Ptr{ComplexF32}}, lda::Ptr{Int64}, b::Ptr{Ptr{ComplexF32}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSgels_batch(device_queue, trans, m, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgels_batch(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::Ptr{Ptr{Cfloat}}, lda::Ptr{Int64}, b::Ptr{Ptr{Cfloat}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgels_batch(device_queue, trans, m, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgels_batch(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::Ptr{Ptr{Cdouble}}, lda::Ptr{Int64}, b::Ptr{Ptr{Cdouble}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgels_batch(device_queue, trans, m, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgels_batch(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::Ptr{Ptr{ComplexF32}}, lda::Ptr{Int64}, b::Ptr{Ptr{ComplexF32}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgels_batch(device_queue, trans, m, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgels_batch(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, nrhs::Ptr{Int64}, a::Ptr{Ptr{ComplexF32}}, lda::Ptr{Int64}, b::Ptr{Ptr{ComplexF32}}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64}, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSpotrf_batch_strided(device_queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSpotrf_batch_strided(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDpotrf_batch_strided(device_queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDpotrf_batch_strided(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCpotrf_batch_strided(device_queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCpotrf_batch_strided(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZpotrf_batch_strided(device_queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZpotrf_batch_strided(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSpotrs_batch_strided(device_queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSpotrs_batch_strided(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, b::Ptr{Cfloat}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDpotrs_batch_strided(device_queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDpotrs_batch_strided(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, b::Ptr{Cdouble}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCpotrs_batch_strided(device_queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCpotrs_batch_strided(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZpotrs_batch_strided(device_queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZpotrs_batch_strided(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSgeqrf_batch_strided(device_queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgeqrf_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, tau::Ptr{Cfloat}, stride_tau::Int64, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgeqrf_batch_strided(device_queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgeqrf_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, tau::Ptr{Cdouble}, stride_tau::Int64, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgeqrf_batch_strided(device_queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgeqrf_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, tau::Ptr{ComplexF32}, stride_tau::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgeqrf_batch_strided(device_queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgeqrf_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, tau::Ptr{ComplexF32}, stride_tau::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSorgqr_batch_strided(device_queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSorgqr_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, tau::Ptr{Cfloat}, stride_tau::Int64, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDorgqr_batch_strided(device_queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDorgqr_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, tau::Ptr{Cdouble}, stride_tau::Int64, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCungqr_batch_strided(device_queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCungqr_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, tau::Ptr{ComplexF32}, stride_tau::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZungqr_batch_strided(device_queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZungqr_batch_strided(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, tau::Ptr{ComplexF32}, stride_tau::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSgetri_batch_strided(device_queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgetri_batch_strided(device_queue::syclQueue_t, n::Int64, a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgetri_batch_strided(device_queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgetri_batch_strided(device_queue::syclQueue_t, n::Int64, a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgetri_batch_strided(device_queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgetri_batch_strided(device_queue::syclQueue_t, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgetri_batch_strided(device_queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgetri_batch_strided(device_queue::syclQueue_t, n::Int64, a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, ipiv::Ptr{Int64}, stride_ipiv::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSgels_batch_strided(device_queue, trans, m, n, nrhs, _a, lda, stride_a, _b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklSgels_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, _a::Ptr{Cfloat}, lda::Int64, stride_a::Int64, _b::Ptr{Cfloat}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{Cfloat}, scratchpad_size::Int64)::Cint end function onemklDgels_batch_strided(device_queue, trans, m, n, nrhs, _a, lda, stride_a, _b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklDgels_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, _a::Ptr{Cdouble}, lda::Int64, stride_a::Int64, _b::Ptr{Cdouble}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{Cdouble}, scratchpad_size::Int64)::Cint end function onemklCgels_batch_strided(device_queue, trans, m, n, nrhs, _a, lda, stride_a, _b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklCgels_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, _a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, _b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklZgels_batch_strided(device_queue, trans, m, n, nrhs, _a, lda, stride_a, _b, ldb, stride_b, batch_size, scratchpad, scratchpad_size) @ccall liboneapi_support.onemklZgels_batch_strided(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, _a::Ptr{ComplexF32}, lda::Int64, stride_a::Int64, _b::Ptr{ComplexF32}, ldb::Int64, stride_b::Int64, batch_size::Int64, scratchpad::Ptr{ComplexF32}, scratchpad_size::Int64)::Cint end function onemklSgebrd_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklSgebrd_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklDgebrd_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklDgebrd_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklCgebrd_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklCgebrd_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklZgebrd_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklZgebrd_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklSgels_scratchpad_size(device_queue, trans, m, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklSgels_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklDgels_scratchpad_size(device_queue, trans, m, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklDgels_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklCgels_scratchpad_size(device_queue, trans, m, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklCgels_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklZgels_scratchpad_size(device_queue, trans, m, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklZgels_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklSgeqrf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklSgeqrf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklDgeqrf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklDgeqrf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklCgeqrf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklCgeqrf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklZgeqrf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklZgeqrf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklSgerqf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklSgerqf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklDgerqf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklDgerqf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklCgerqf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklCgerqf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklZgerqf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklZgerqf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklSgesv_scratchpad_size(device_queue, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklSgesv_scratchpad_size(device_queue::syclQueue_t, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklDgesv_scratchpad_size(device_queue, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklDgesv_scratchpad_size(device_queue::syclQueue_t, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklCgesv_scratchpad_size(device_queue, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklCgesv_scratchpad_size(device_queue::syclQueue_t, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklZgesv_scratchpad_size(device_queue, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklZgesv_scratchpad_size(device_queue::syclQueue_t, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklSgesvd_scratchpad_size(device_queue, jobu, jobvt, m, n, lda, ldu, ldvt) @ccall liboneapi_support.onemklSgesvd_scratchpad_size(device_queue::syclQueue_t, jobu::onemklJobsvd, jobvt::onemklJobsvd, m::Int64, n::Int64, lda::Int64, ldu::Int64, ldvt::Int64)::Int64 end function onemklDgesvd_scratchpad_size(device_queue, jobu, jobvt, m, n, lda, ldu, ldvt) @ccall liboneapi_support.onemklDgesvd_scratchpad_size(device_queue::syclQueue_t, jobu::onemklJobsvd, jobvt::onemklJobsvd, m::Int64, n::Int64, lda::Int64, ldu::Int64, ldvt::Int64)::Int64 end function onemklCgesvd_scratchpad_size(device_queue, jobu, jobvt, m, n, lda, ldu, ldvt) @ccall liboneapi_support.onemklCgesvd_scratchpad_size(device_queue::syclQueue_t, jobu::onemklJobsvd, jobvt::onemklJobsvd, m::Int64, n::Int64, lda::Int64, ldu::Int64, ldvt::Int64)::Int64 end function onemklZgesvd_scratchpad_size(device_queue, jobu, jobvt, m, n, lda, ldu, ldvt) @ccall liboneapi_support.onemklZgesvd_scratchpad_size(device_queue::syclQueue_t, jobu::onemklJobsvd, jobvt::onemklJobsvd, m::Int64, n::Int64, lda::Int64, ldu::Int64, ldvt::Int64)::Int64 end function onemklSgetrf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklSgetrf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklDgetrf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklDgetrf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklCgetrf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklCgetrf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklZgetrf_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklZgetrf_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklSgetrfnp_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklSgetrfnp_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklDgetrfnp_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklDgetrfnp_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklCgetrfnp_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklCgetrfnp_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklZgetrfnp_scratchpad_size(device_queue, m, n, lda) @ccall liboneapi_support.onemklZgetrfnp_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64)::Int64 end function onemklSgetri_scratchpad_size(device_queue, n, lda) @ccall liboneapi_support.onemklSgetri_scratchpad_size(device_queue::syclQueue_t, n::Int64, lda::Int64)::Int64 end function onemklDgetri_scratchpad_size(device_queue, n, lda) @ccall liboneapi_support.onemklDgetri_scratchpad_size(device_queue::syclQueue_t, n::Int64, lda::Int64)::Int64 end function onemklCgetri_scratchpad_size(device_queue, n, lda) @ccall liboneapi_support.onemklCgetri_scratchpad_size(device_queue::syclQueue_t, n::Int64, lda::Int64)::Int64 end function onemklZgetri_scratchpad_size(device_queue, n, lda) @ccall liboneapi_support.onemklZgetri_scratchpad_size(device_queue::syclQueue_t, n::Int64, lda::Int64)::Int64 end function onemklSgetrs_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklSgetrs_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklDgetrs_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklDgetrs_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklCgetrs_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklCgetrs_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklZgetrs_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklZgetrs_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklCheev_scratchpad_size(device_queue, jobz, uplo, n, lda) @ccall liboneapi_support.onemklCheev_scratchpad_size(device_queue::syclQueue_t, jobz::onemklCompz, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklZheev_scratchpad_size(device_queue, jobz, uplo, n, lda) @ccall liboneapi_support.onemklZheev_scratchpad_size(device_queue::syclQueue_t, jobz::onemklCompz, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklCheevd_scratchpad_size(device_queue, jobz, uplo, n, lda) @ccall liboneapi_support.onemklCheevd_scratchpad_size(device_queue::syclQueue_t, jobz::onemklJob, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklZheevd_scratchpad_size(device_queue, jobz, uplo, n, lda) @ccall liboneapi_support.onemklZheevd_scratchpad_size(device_queue::syclQueue_t, jobz::onemklJob, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklCheevx_scratchpad_size(device_queue, jobz, range, uplo, n, lda, vl, vu, il, iu, abstol, ldz) @ccall liboneapi_support.onemklCheevx_scratchpad_size(device_queue::syclQueue_t, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, lda::Int64, vl::Ptr{Cfloat}, vu::Ptr{Cfloat}, il::Int64, iu::Int64, abstol::Ptr{Cfloat}, ldz::Int64)::Int64 end function onemklZheevx_scratchpad_size(device_queue, jobz, range, uplo, n, lda, vl, vu, il, iu, abstol, ldz) @ccall liboneapi_support.onemklZheevx_scratchpad_size(device_queue::syclQueue_t, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, lda::Int64, vl::Ptr{Cdouble}, vu::Ptr{Cdouble}, il::Int64, iu::Int64, abstol::Ptr{Cdouble}, ldz::Int64)::Int64 end function onemklChegvd_scratchpad_size(device_queue, itype, jobz, uplo, n, lda, ldb) @ccall liboneapi_support.onemklChegvd_scratchpad_size(device_queue::syclQueue_t, itype::Int64, jobz::onemklJob, uplo::onemklUplo, n::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklZhegvd_scratchpad_size(device_queue, itype, jobz, uplo, n, lda, ldb) @ccall liboneapi_support.onemklZhegvd_scratchpad_size(device_queue::syclQueue_t, itype::Int64, jobz::onemklJob, uplo::onemklUplo, n::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklChegvx_scratchpad_size(device_queue, itype, jobz, range, uplo, n, lda, ldb, vl, vu, il, iu, abstol, ldz) @ccall liboneapi_support.onemklChegvx_scratchpad_size(device_queue::syclQueue_t, itype::Int64, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, lda::Int64, ldb::Int64, vl::Ptr{Cfloat}, vu::Ptr{Cfloat}, il::Int64, iu::Int64, abstol::Ptr{Cfloat}, ldz::Int64)::Int64 end function onemklZhegvx_scratchpad_size(device_queue, itype, jobz, range, uplo, n, lda, ldb, vl, vu, il, iu, abstol, ldz) @ccall liboneapi_support.onemklZhegvx_scratchpad_size(device_queue::syclQueue_t, itype::Int64, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, lda::Int64, ldb::Int64, vl::Ptr{Cdouble}, vu::Ptr{Cdouble}, il::Int64, iu::Int64, abstol::Ptr{Cdouble}, ldz::Int64)::Int64 end function onemklChetrd_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklChetrd_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklZhetrd_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklZhetrd_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklChetrf_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklChetrf_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklZhetrf_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklZhetrf_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklSorgbr_scratchpad_size(device_queue, vect, m, n, k, lda) @ccall liboneapi_support.onemklSorgbr_scratchpad_size(device_queue::syclQueue_t, vect::onemklGenerate, m::Int64, n::Int64, k::Int64, lda::Int64)::Int64 end function onemklDorgbr_scratchpad_size(device_queue, vect, m, n, k, lda) @ccall liboneapi_support.onemklDorgbr_scratchpad_size(device_queue::syclQueue_t, vect::onemklGenerate, m::Int64, n::Int64, k::Int64, lda::Int64)::Int64 end function onemklSorgqr_scratchpad_size(device_queue, m, n, k, lda) @ccall liboneapi_support.onemklSorgqr_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, lda::Int64)::Int64 end function onemklDorgqr_scratchpad_size(device_queue, m, n, k, lda) @ccall liboneapi_support.onemklDorgqr_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, lda::Int64)::Int64 end function onemklSorgtr_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklSorgtr_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklDorgtr_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklDorgtr_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklSormqr_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) @ccall liboneapi_support.onemklSormqr_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklDormqr_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) @ccall liboneapi_support.onemklDormqr_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklSormrq_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) @ccall liboneapi_support.onemklSormrq_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklDormrq_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) @ccall liboneapi_support.onemklDormrq_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklSormtr_scratchpad_size(device_queue, side, uplo, trans, m, n, lda, ldc) @ccall liboneapi_support.onemklSormtr_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, uplo::onemklUplo, trans::onemklTranspose, m::Int64, n::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklDormtr_scratchpad_size(device_queue, side, uplo, trans, m, n, lda, ldc) @ccall liboneapi_support.onemklDormtr_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, uplo::onemklUplo, trans::onemklTranspose, m::Int64, n::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklSpotrf_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklSpotrf_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklDpotrf_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklDpotrf_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklCpotrf_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklCpotrf_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklZpotrf_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklZpotrf_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklSpotri_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklSpotri_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklDpotri_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklDpotri_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklCpotri_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklCpotri_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklZpotri_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklZpotri_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklSpotrs_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklSpotrs_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklDpotrs_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklDpotrs_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklCpotrs_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklCpotrs_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklZpotrs_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklZpotrs_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklSsteqr_scratchpad_size(device_queue, compz, n, ldz) @ccall liboneapi_support.onemklSsteqr_scratchpad_size(device_queue::syclQueue_t, compz::onemklCompz, n::Int64, ldz::Int64)::Int64 end function onemklDsteqr_scratchpad_size(device_queue, compz, n, ldz) @ccall liboneapi_support.onemklDsteqr_scratchpad_size(device_queue::syclQueue_t, compz::onemklCompz, n::Int64, ldz::Int64)::Int64 end function onemklCsteqr_scratchpad_size(device_queue, compz, n, ldz) @ccall liboneapi_support.onemklCsteqr_scratchpad_size(device_queue::syclQueue_t, compz::onemklCompz, n::Int64, ldz::Int64)::Int64 end function onemklZsteqr_scratchpad_size(device_queue, compz, n, ldz) @ccall liboneapi_support.onemklZsteqr_scratchpad_size(device_queue::syclQueue_t, compz::onemklCompz, n::Int64, ldz::Int64)::Int64 end function onemklSsyev_scratchpad_size(device_queue, jobz, uplo, n, lda) @ccall liboneapi_support.onemklSsyev_scratchpad_size(device_queue::syclQueue_t, jobz::onemklCompz, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklDsyev_scratchpad_size(device_queue, jobz, uplo, n, lda) @ccall liboneapi_support.onemklDsyev_scratchpad_size(device_queue::syclQueue_t, jobz::onemklCompz, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklSsyevd_scratchpad_size(device_queue, jobz, uplo, n, lda) @ccall liboneapi_support.onemklSsyevd_scratchpad_size(device_queue::syclQueue_t, jobz::onemklJob, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklDsyevd_scratchpad_size(device_queue, jobz, uplo, n, lda) @ccall liboneapi_support.onemklDsyevd_scratchpad_size(device_queue::syclQueue_t, jobz::onemklJob, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklSsyevx_scratchpad_size(device_queue, jobz, range, uplo, n, lda, vl, vu, il, iu, abstol, ldz) @ccall liboneapi_support.onemklSsyevx_scratchpad_size(device_queue::syclQueue_t, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, lda::Int64, vl::Ptr{Cfloat}, vu::Ptr{Cfloat}, il::Int64, iu::Int64, abstol::Ptr{Cfloat}, ldz::Int64)::Int64 end function onemklDsyevx_scratchpad_size(device_queue, jobz, range, uplo, n, lda, vl, vu, il, iu, abstol, ldz) @ccall liboneapi_support.onemklDsyevx_scratchpad_size(device_queue::syclQueue_t, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, lda::Int64, vl::Ptr{Cdouble}, vu::Ptr{Cdouble}, il::Int64, iu::Int64, abstol::Ptr{Cdouble}, ldz::Int64)::Int64 end function onemklSsygvd_scratchpad_size(device_queue, itype, jobz, uplo, n, lda, ldb) @ccall liboneapi_support.onemklSsygvd_scratchpad_size(device_queue::syclQueue_t, itype::Int64, jobz::onemklJob, uplo::onemklUplo, n::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklDsygvd_scratchpad_size(device_queue, itype, jobz, uplo, n, lda, ldb) @ccall liboneapi_support.onemklDsygvd_scratchpad_size(device_queue::syclQueue_t, itype::Int64, jobz::onemklJob, uplo::onemklUplo, n::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklSsygvx_scratchpad_size(device_queue, itype, jobz, range, uplo, n, lda, ldb, vl, vu, il, iu, abstol, ldz) @ccall liboneapi_support.onemklSsygvx_scratchpad_size(device_queue::syclQueue_t, itype::Int64, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, lda::Int64, ldb::Int64, vl::Ptr{Cfloat}, vu::Ptr{Cfloat}, il::Int64, iu::Int64, abstol::Ptr{Cfloat}, ldz::Int64)::Int64 end function onemklDsygvx_scratchpad_size(device_queue, itype, jobz, range, uplo, n, lda, ldb, vl, vu, il, iu, abstol, ldz) @ccall liboneapi_support.onemklDsygvx_scratchpad_size(device_queue::syclQueue_t, itype::Int64, jobz::onemklCompz, range::onemklRangev, uplo::onemklUplo, n::Int64, lda::Int64, ldb::Int64, vl::Ptr{Cdouble}, vu::Ptr{Cdouble}, il::Int64, iu::Int64, abstol::Ptr{Cdouble}, ldz::Int64)::Int64 end function onemklSsytrd_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklSsytrd_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklDsytrd_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklDsytrd_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklSsytrf_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklSsytrf_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklDsytrf_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklDsytrf_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklCsytrf_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklCsytrf_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklZsytrf_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklZsytrf_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklStrtri_scratchpad_size(device_queue, uplo, diag, n, lda) @ccall liboneapi_support.onemklStrtri_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, diag::onemklDiag, n::Int64, lda::Int64)::Int64 end function onemklDtrtri_scratchpad_size(device_queue, uplo, diag, n, lda) @ccall liboneapi_support.onemklDtrtri_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, diag::onemklDiag, n::Int64, lda::Int64)::Int64 end function onemklCtrtri_scratchpad_size(device_queue, uplo, diag, n, lda) @ccall liboneapi_support.onemklCtrtri_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, diag::onemklDiag, n::Int64, lda::Int64)::Int64 end function onemklZtrtri_scratchpad_size(device_queue, uplo, diag, n, lda) @ccall liboneapi_support.onemklZtrtri_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, diag::onemklDiag, n::Int64, lda::Int64)::Int64 end function onemklStrtrs_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklStrtrs_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, trans::onemklTranspose, diag::onemklDiag, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklDtrtrs_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklDtrtrs_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, trans::onemklTranspose, diag::onemklDiag, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklCtrtrs_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklCtrtrs_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, trans::onemklTranspose, diag::onemklDiag, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklZtrtrs_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb) @ccall liboneapi_support.onemklZtrtrs_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, trans::onemklTranspose, diag::onemklDiag, n::Int64, nrhs::Int64, lda::Int64, ldb::Int64)::Int64 end function onemklCungbr_scratchpad_size(device_queue, vect, m, n, k, lda) @ccall liboneapi_support.onemklCungbr_scratchpad_size(device_queue::syclQueue_t, vect::onemklGenerate, m::Int64, n::Int64, k::Int64, lda::Int64)::Int64 end function onemklZungbr_scratchpad_size(device_queue, vect, m, n, k, lda) @ccall liboneapi_support.onemklZungbr_scratchpad_size(device_queue::syclQueue_t, vect::onemklGenerate, m::Int64, n::Int64, k::Int64, lda::Int64)::Int64 end function onemklCungqr_scratchpad_size(device_queue, m, n, k, lda) @ccall liboneapi_support.onemklCungqr_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, lda::Int64)::Int64 end function onemklZungqr_scratchpad_size(device_queue, m, n, k, lda) @ccall liboneapi_support.onemklZungqr_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, lda::Int64)::Int64 end function onemklCungtr_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklCungtr_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklZungtr_scratchpad_size(device_queue, uplo, n, lda) @ccall liboneapi_support.onemklZungtr_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64)::Int64 end function onemklCunmqr_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) @ccall liboneapi_support.onemklCunmqr_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklZunmqr_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) @ccall liboneapi_support.onemklZunmqr_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklCunmrq_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) @ccall liboneapi_support.onemklCunmrq_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklZunmrq_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc) @ccall liboneapi_support.onemklZunmrq_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, trans::onemklTranspose, m::Int64, n::Int64, k::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklCunmtr_scratchpad_size(device_queue, side, uplo, trans, m, n, lda, ldc) @ccall liboneapi_support.onemklCunmtr_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, uplo::onemklUplo, trans::onemklTranspose, m::Int64, n::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklZunmtr_scratchpad_size(device_queue, side, uplo, trans, m, n, lda, ldc) @ccall liboneapi_support.onemklZunmtr_scratchpad_size(device_queue::syclQueue_t, side::onemklSide, uplo::onemklUplo, trans::onemklTranspose, m::Int64, n::Int64, lda::Int64, ldc::Int64)::Int64 end function onemklSgeinv_batch_scratchpad_size(device_queue, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklSgeinv_batch_scratchpad_size(device_queue::syclQueue_t, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDgeinv_batch_scratchpad_size(device_queue, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklDgeinv_batch_scratchpad_size(device_queue::syclQueue_t, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCgeinv_batch_scratchpad_size(device_queue, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklCgeinv_batch_scratchpad_size(device_queue::syclQueue_t, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZgeinv_batch_scratchpad_size(device_queue, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklZgeinv_batch_scratchpad_size(device_queue::syclQueue_t, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSgels_batch_scratchpad_size(device_queue, trans, m, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklSgels_batch_scratchpad_size(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDgels_batch_scratchpad_size(device_queue, trans, m, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklDgels_batch_scratchpad_size(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCgels_batch_scratchpad_size(device_queue, trans, m, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklCgels_batch_scratchpad_size(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZgels_batch_scratchpad_size(device_queue, trans, m, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklZgels_batch_scratchpad_size(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSgels_batch_strided_scratchpad_size(device_queue, trans, m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklSgels_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklDgels_batch_strided_scratchpad_size(device_queue, trans, m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklDgels_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklCgels_batch_strided_scratchpad_size(device_queue, trans, m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklCgels_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklZgels_batch_strided_scratchpad_size(device_queue, trans, m, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklZgels_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, m::Int64, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklSgeqrf_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklSgeqrf_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDgeqrf_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklDgeqrf_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCgeqrf_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklCgeqrf_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZgeqrf_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklZgeqrf_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSgeqrf_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_tau, batch_size) @ccall liboneapi_support.onemklSgeqrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_tau::Int64, batch_size::Int64)::Int64 end function onemklDgeqrf_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_tau, batch_size) @ccall liboneapi_support.onemklDgeqrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_tau::Int64, batch_size::Int64)::Int64 end function onemklCgeqrf_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_tau, batch_size) @ccall liboneapi_support.onemklCgeqrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_tau::Int64, batch_size::Int64)::Int64 end function onemklZgeqrf_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_tau, batch_size) @ccall liboneapi_support.onemklZgeqrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_tau::Int64, batch_size::Int64)::Int64 end function onemklSgesvda_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_s, ldu, stride_u, ldvt, stride_vt, batch_size) @ccall liboneapi_support.onemklSgesvda_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_s::Int64, ldu::Int64, stride_u::Int64, ldvt::Int64, stride_vt::Int64, batch_size::Int64)::Int64 end function onemklDgesvda_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_s, ldu, stride_u, ldvt, stride_vt, batch_size) @ccall liboneapi_support.onemklDgesvda_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_s::Int64, ldu::Int64, stride_u::Int64, ldvt::Int64, stride_vt::Int64, batch_size::Int64)::Int64 end function onemklCgesvda_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_s, ldu, stride_u, ldvt, stride_vt, batch_size) @ccall liboneapi_support.onemklCgesvda_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_s::Int64, ldu::Int64, stride_u::Int64, ldvt::Int64, stride_vt::Int64, batch_size::Int64)::Int64 end function onemklZgesvda_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_s, ldu, stride_u, ldvt, stride_vt, batch_size) @ccall liboneapi_support.onemklZgesvda_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_s::Int64, ldu::Int64, stride_u::Int64, ldvt::Int64, stride_vt::Int64, batch_size::Int64)::Int64 end function onemklSgetrf_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_ipiv, batch_size) @ccall liboneapi_support.onemklSgetrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, batch_size::Int64)::Int64 end function onemklDgetrf_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_ipiv, batch_size) @ccall liboneapi_support.onemklDgetrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, batch_size::Int64)::Int64 end function onemklCgetrf_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_ipiv, batch_size) @ccall liboneapi_support.onemklCgetrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, batch_size::Int64)::Int64 end function onemklZgetrf_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, stride_ipiv, batch_size) @ccall liboneapi_support.onemklZgetrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, batch_size::Int64)::Int64 end function onemklSgetrf_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklSgetrf_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDgetrf_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklDgetrf_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCgetrf_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklCgetrf_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZgetrf_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklZgetrf_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSgetrfnp_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, batch_size) @ccall liboneapi_support.onemklSgetrfnp_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, batch_size::Int64)::Int64 end function onemklDgetrfnp_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, batch_size) @ccall liboneapi_support.onemklDgetrfnp_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, batch_size::Int64)::Int64 end function onemklCgetrfnp_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, batch_size) @ccall liboneapi_support.onemklCgetrfnp_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, batch_size::Int64)::Int64 end function onemklZgetrfnp_batch_strided_scratchpad_size(device_queue, m, n, lda, stride_a, batch_size) @ccall liboneapi_support.onemklZgetrfnp_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, lda::Int64, stride_a::Int64, batch_size::Int64)::Int64 end function onemklSgetrfnp_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklSgetrfnp_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDgetrfnp_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklDgetrfnp_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCgetrfnp_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklCgetrfnp_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZgetrfnp_batch_scratchpad_size(device_queue, m, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklZgetrfnp_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSgetri_batch_scratchpad_size(device_queue, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklSgetri_batch_scratchpad_size(device_queue::syclQueue_t, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDgetri_batch_scratchpad_size(device_queue, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklDgetri_batch_scratchpad_size(device_queue::syclQueue_t, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCgetri_batch_scratchpad_size(device_queue, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklCgetri_batch_scratchpad_size(device_queue::syclQueue_t, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZgetri_batch_scratchpad_size(device_queue, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklZgetri_batch_scratchpad_size(device_queue::syclQueue_t, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSgetri_batch_strided_scratchpad_size(device_queue, n, lda, stride_a, stride_ipiv, batch_size) @ccall liboneapi_support.onemklSgetri_batch_strided_scratchpad_size(device_queue::syclQueue_t, n::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, batch_size::Int64)::Int64 end function onemklDgetri_batch_strided_scratchpad_size(device_queue, n, lda, stride_a, stride_ipiv, batch_size) @ccall liboneapi_support.onemklDgetri_batch_strided_scratchpad_size(device_queue::syclQueue_t, n::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, batch_size::Int64)::Int64 end function onemklCgetri_batch_strided_scratchpad_size(device_queue, n, lda, stride_a, stride_ipiv, batch_size) @ccall liboneapi_support.onemklCgetri_batch_strided_scratchpad_size(device_queue::syclQueue_t, n::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, batch_size::Int64)::Int64 end function onemklZgetri_batch_strided_scratchpad_size(device_queue, n, lda, stride_a, stride_ipiv, batch_size) @ccall liboneapi_support.onemklZgetri_batch_strided_scratchpad_size(device_queue::syclQueue_t, n::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, batch_size::Int64)::Int64 end function onemklSgetrs_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklSgetrs_batch_scratchpad_size(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDgetrs_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklDgetrs_batch_scratchpad_size(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCgetrs_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklCgetrs_batch_scratchpad_size(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZgetrs_batch_scratchpad_size(device_queue, trans, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklZgetrs_batch_scratchpad_size(device_queue::syclQueue_t, trans::Ptr{onemklTranspose}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSgetrs_batch_strided_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklSgetrs_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklDgetrs_batch_strided_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklDgetrs_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklCgetrs_batch_strided_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklCgetrs_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklZgetrs_batch_strided_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklZgetrs_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, stride_ipiv::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklSgetrsnp_batch_strided_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklSgetrsnp_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklDgetrsnp_batch_strided_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklDgetrsnp_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklCgetrsnp_batch_strided_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklCgetrsnp_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklZgetrsnp_batch_strided_scratchpad_size(device_queue, trans, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklZgetrsnp_batch_strided_scratchpad_size(device_queue::syclQueue_t, trans::onemklTranspose, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklSorgqr_batch_scratchpad_size(device_queue, m, n, k, lda, group_count, group_sizes) @ccall liboneapi_support.onemklSorgqr_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDorgqr_batch_scratchpad_size(device_queue, m, n, k, lda, group_count, group_sizes) @ccall liboneapi_support.onemklDorgqr_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSorgqr_batch_strided_scratchpad_size(device_queue, m, n, k, lda, stride_a, stride_tau, batch_size) @ccall liboneapi_support.onemklSorgqr_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, lda::Int64, stride_a::Int64, stride_tau::Int64, batch_size::Int64)::Int64 end function onemklDorgqr_batch_strided_scratchpad_size(device_queue, m, n, k, lda, stride_a, stride_tau, batch_size) @ccall liboneapi_support.onemklDorgqr_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, lda::Int64, stride_a::Int64, stride_tau::Int64, batch_size::Int64)::Int64 end function onemklSormqr_batch_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc, group_count, group_sizes) @ccall liboneapi_support.onemklSormqr_batch_scratchpad_size(device_queue::syclQueue_t, side::Ptr{onemklSide}, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, lda::Ptr{Int64}, ldc::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDormqr_batch_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc, group_count, group_sizes) @ccall liboneapi_support.onemklDormqr_batch_scratchpad_size(device_queue::syclQueue_t, side::Ptr{onemklSide}, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, lda::Ptr{Int64}, ldc::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSpotrf_batch_scratchpad_size(device_queue, uplo, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklSpotrf_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDpotrf_batch_scratchpad_size(device_queue, uplo, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklDpotrf_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCpotrf_batch_scratchpad_size(device_queue, uplo, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklCpotrf_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZpotrf_batch_scratchpad_size(device_queue, uplo, n, lda, group_count, group_sizes) @ccall liboneapi_support.onemklZpotrf_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSpotrf_batch_strided_scratchpad_size(device_queue, uplo, n, lda, stride_a, batch_size) @ccall liboneapi_support.onemklSpotrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64, stride_a::Int64, batch_size::Int64)::Int64 end function onemklDpotrf_batch_strided_scratchpad_size(device_queue, uplo, n, lda, stride_a, batch_size) @ccall liboneapi_support.onemklDpotrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64, stride_a::Int64, batch_size::Int64)::Int64 end function onemklCpotrf_batch_strided_scratchpad_size(device_queue, uplo, n, lda, stride_a, batch_size) @ccall liboneapi_support.onemklCpotrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64, stride_a::Int64, batch_size::Int64)::Int64 end function onemklZpotrf_batch_strided_scratchpad_size(device_queue, uplo, n, lda, stride_a, batch_size) @ccall liboneapi_support.onemklZpotrf_batch_strided_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, lda::Int64, stride_a::Int64, batch_size::Int64)::Int64 end function onemklSpotrs_batch_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklSpotrs_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDpotrs_batch_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklDpotrs_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCpotrs_batch_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklCpotrs_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZpotrs_batch_scratchpad_size(device_queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklZpotrs_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklSpotrs_batch_strided_scratchpad_size(device_queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklSpotrs_batch_strided_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklDpotrs_batch_strided_scratchpad_size(device_queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklDpotrs_batch_strided_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklCpotrs_batch_strided_scratchpad_size(device_queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklCpotrs_batch_strided_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklZpotrs_batch_strided_scratchpad_size(device_queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size) @ccall liboneapi_support.onemklZpotrs_batch_strided_scratchpad_size(device_queue::syclQueue_t, uplo::onemklUplo, n::Int64, nrhs::Int64, lda::Int64, stride_a::Int64, ldb::Int64, stride_b::Int64, batch_size::Int64)::Int64 end function onemklStrtrs_batch_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklStrtrs_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, trans::Ptr{onemklTranspose}, diag::Ptr{onemklDiag}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklDtrtrs_batch_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklDtrtrs_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, trans::Ptr{onemklTranspose}, diag::Ptr{onemklDiag}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCtrtrs_batch_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklCtrtrs_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, trans::Ptr{onemklTranspose}, diag::Ptr{onemklDiag}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZtrtrs_batch_scratchpad_size(device_queue, uplo, trans, diag, n, nrhs, lda, ldb, group_count, group_sizes) @ccall liboneapi_support.onemklZtrtrs_batch_scratchpad_size(device_queue::syclQueue_t, uplo::Ptr{onemklUplo}, trans::Ptr{onemklTranspose}, diag::Ptr{onemklDiag}, n::Ptr{Int64}, nrhs::Ptr{Int64}, lda::Ptr{Int64}, ldb::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCungqr_batch_scratchpad_size(device_queue, m, n, k, lda, group_count, group_sizes) @ccall liboneapi_support.onemklCungqr_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZungqr_batch_scratchpad_size(device_queue, m, n, k, lda, group_count, group_sizes) @ccall liboneapi_support.onemklZungqr_batch_scratchpad_size(device_queue::syclQueue_t, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, lda::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklCungqr_batch_strided_scratchpad_size(device_queue, m, n, k, lda, stride_a, stride_tau, batch_size) @ccall liboneapi_support.onemklCungqr_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, lda::Int64, stride_a::Int64, stride_tau::Int64, batch_size::Int64)::Int64 end function onemklZungqr_batch_strided_scratchpad_size(device_queue, m, n, k, lda, stride_a, stride_tau, batch_size) @ccall liboneapi_support.onemklZungqr_batch_strided_scratchpad_size(device_queue::syclQueue_t, m::Int64, n::Int64, k::Int64, lda::Int64, stride_a::Int64, stride_tau::Int64, batch_size::Int64)::Int64 end function onemklCunmqr_batch_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc, group_count, group_sizes) @ccall liboneapi_support.onemklCunmqr_batch_scratchpad_size(device_queue::syclQueue_t, side::Ptr{onemklSide}, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, lda::Ptr{Int64}, ldc::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklZunmqr_batch_scratchpad_size(device_queue, side, trans, m, n, k, lda, ldc, group_count, group_sizes) @ccall liboneapi_support.onemklZunmqr_batch_scratchpad_size(device_queue::syclQueue_t, side::Ptr{onemklSide}, trans::Ptr{onemklTranspose}, m::Ptr{Int64}, n::Ptr{Int64}, k::Ptr{Int64}, lda::Ptr{Int64}, ldc::Ptr{Int64}, group_count::Int64, group_sizes::Ptr{Int64})::Int64 end function onemklXsparse_init_matrix_handle(p_spMat) @ccall liboneapi_support.onemklXsparse_init_matrix_handle(p_spMat::Ptr{matrix_handle_t})::Cint end function onemklXsparse_release_matrix_handle(device_queue, p_spMat) @ccall liboneapi_support.onemklXsparse_release_matrix_handle(device_queue::syclQueue_t, p_spMat::Ptr{matrix_handle_t})::Cint end function onemklSsparse_set_csr_data(device_queue, spMat, nrows, ncols, index, row_ptr, col_ind, values) @ccall liboneapi_support.onemklSsparse_set_csr_data(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int32, ncols::Int32, index::onemklIndex, row_ptr::ZePtr{Int32}, col_ind::ZePtr{Int32}, values::ZePtr{Cfloat})::Cint end function onemklSsparse_set_csr_data_64(device_queue, spMat, nrows, ncols, index, row_ptr, col_ind, values) @ccall liboneapi_support.onemklSsparse_set_csr_data_64(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int64, ncols::Int64, index::onemklIndex, row_ptr::ZePtr{Int64}, col_ind::ZePtr{Int64}, values::ZePtr{Cfloat})::Cint end function onemklDsparse_set_csr_data(device_queue, spMat, nrows, ncols, index, row_ptr, col_ind, values) @ccall liboneapi_support.onemklDsparse_set_csr_data(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int32, ncols::Int32, index::onemklIndex, row_ptr::ZePtr{Int32}, col_ind::ZePtr{Int32}, values::ZePtr{Cdouble})::Cint end function onemklDsparse_set_csr_data_64(device_queue, spMat, nrows, ncols, index, row_ptr, col_ind, values) @ccall liboneapi_support.onemklDsparse_set_csr_data_64(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int64, ncols::Int64, index::onemklIndex, row_ptr::ZePtr{Int64}, col_ind::ZePtr{Int64}, values::ZePtr{Cdouble})::Cint end function onemklCsparse_set_csr_data(device_queue, spMat, nrows, ncols, index, row_ptr, col_ind, values) @ccall liboneapi_support.onemklCsparse_set_csr_data(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int32, ncols::Int32, index::onemklIndex, row_ptr::ZePtr{Int32}, col_ind::ZePtr{Int32}, values::ZePtr{ComplexF32})::Cint end function onemklCsparse_set_csr_data_64(device_queue, spMat, nrows, ncols, index, row_ptr, col_ind, values) @ccall liboneapi_support.onemklCsparse_set_csr_data_64(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int64, ncols::Int64, index::onemklIndex, row_ptr::ZePtr{Int64}, col_ind::ZePtr{Int64}, values::ZePtr{ComplexF32})::Cint end function onemklZsparse_set_csr_data(device_queue, spMat, nrows, ncols, index, row_ptr, col_ind, values) @ccall liboneapi_support.onemklZsparse_set_csr_data(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int32, ncols::Int32, index::onemklIndex, row_ptr::ZePtr{Int32}, col_ind::ZePtr{Int32}, values::ZePtr{ComplexF64})::Cint end function onemklZsparse_set_csr_data_64(device_queue, spMat, nrows, ncols, index, row_ptr, col_ind, values) @ccall liboneapi_support.onemklZsparse_set_csr_data_64(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int64, ncols::Int64, index::onemklIndex, row_ptr::ZePtr{Int64}, col_ind::ZePtr{Int64}, values::ZePtr{ComplexF64})::Cint end function onemklSsparse_set_coo_data(device_queue, spMat, nrows, ncols, nnz, index, row_ind, col_ind, values) @ccall liboneapi_support.onemklSsparse_set_coo_data(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int32, ncols::Int32, nnz::Int32, index::onemklIndex, row_ind::ZePtr{Int32}, col_ind::ZePtr{Int32}, values::ZePtr{Cfloat})::Cint end function onemklSsparse_set_coo_data_64(device_queue, spMat, nrows, ncols, nnz, index, row_ind, col_ind, values) @ccall liboneapi_support.onemklSsparse_set_coo_data_64(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int64, ncols::Int64, nnz::Int64, index::onemklIndex, row_ind::ZePtr{Int64}, col_ind::ZePtr{Int64}, values::ZePtr{Cfloat})::Cint end function onemklDsparse_set_coo_data(device_queue, spMat, nrows, ncols, nnz, index, row_ind, col_ind, values) @ccall liboneapi_support.onemklDsparse_set_coo_data(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int32, ncols::Int32, nnz::Int32, index::onemklIndex, row_ind::ZePtr{Int32}, col_ind::ZePtr{Int32}, values::ZePtr{Cdouble})::Cint end function onemklDsparse_set_coo_data_64(device_queue, spMat, nrows, ncols, nnz, index, row_ind, col_ind, values) @ccall liboneapi_support.onemklDsparse_set_coo_data_64(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int64, ncols::Int64, nnz::Int64, index::onemklIndex, row_ind::ZePtr{Int64}, col_ind::ZePtr{Int64}, values::ZePtr{Cdouble})::Cint end function onemklCsparse_set_coo_data(device_queue, spMat, nrows, ncols, nnz, index, row_ind, col_ind, values) @ccall liboneapi_support.onemklCsparse_set_coo_data(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int32, ncols::Int32, nnz::Int32, index::onemklIndex, row_ind::ZePtr{Int32}, col_ind::ZePtr{Int32}, values::ZePtr{ComplexF32})::Cint end function onemklCsparse_set_coo_data_64(device_queue, spMat, nrows, ncols, nnz, index, row_ind, col_ind, values) @ccall liboneapi_support.onemklCsparse_set_coo_data_64(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int64, ncols::Int64, nnz::Int64, index::onemklIndex, row_ind::ZePtr{Int64}, col_ind::ZePtr{Int64}, values::ZePtr{ComplexF32})::Cint end function onemklZsparse_set_coo_data(device_queue, spMat, nrows, ncols, nnz, index, row_ind, col_ind, values) @ccall liboneapi_support.onemklZsparse_set_coo_data(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int32, ncols::Int32, nnz::Int32, index::onemklIndex, row_ind::ZePtr{Int32}, col_ind::ZePtr{Int32}, values::ZePtr{ComplexF64})::Cint end function onemklZsparse_set_coo_data_64(device_queue, spMat, nrows, ncols, nnz, index, row_ind, col_ind, values) @ccall liboneapi_support.onemklZsparse_set_coo_data_64(device_queue::syclQueue_t, spMat::matrix_handle_t, nrows::Int64, ncols::Int64, nnz::Int64, index::onemklIndex, row_ind::ZePtr{Int64}, col_ind::ZePtr{Int64}, values::ZePtr{ComplexF64})::Cint end function onemklXsparse_init_matmat_descr(p_desc) @ccall liboneapi_support.onemklXsparse_init_matmat_descr(p_desc::Ptr{matmat_descr_t})::Cint end function onemklXsparse_release_matmat_descr(p_desc) @ccall liboneapi_support.onemklXsparse_release_matmat_descr(p_desc::Ptr{matmat_descr_t})::Cint end function onemklXsparse_init_omatconvert_descr(device_queue, p_descr) @ccall liboneapi_support.onemklXsparse_init_omatconvert_descr(device_queue::syclQueue_t, p_descr::Ptr{omatconvert_descr_t})::Cint end function onemklXsparse_release_omatconvert_descr(device_queue, descr) @ccall liboneapi_support.onemklXsparse_release_omatconvert_descr(device_queue::syclQueue_t, descr::omatconvert_descr_t)::Cint end function onemklXsparse_init_omatadd_descr(device_queue, p_omatadd_desc) @ccall liboneapi_support.onemklXsparse_init_omatadd_descr(device_queue::syclQueue_t, p_omatadd_desc::Ptr{omatadd_descr_t})::Cint end function onemklXsparse_release_omatadd_descr(device_queue, omatadd_desc) @ccall liboneapi_support.onemklXsparse_release_omatadd_descr(device_queue::syclQueue_t, omatadd_desc::omatadd_descr_t)::Cint end function onemklXsparse_omatcopy(device_queue, transpose_val, spMat_in, spMat_out) @ccall liboneapi_support.onemklXsparse_omatcopy(device_queue::syclQueue_t, transpose_val::onemklTranspose, spMat_in::matrix_handle_t, spMat_out::matrix_handle_t)::Cint end function onemklXsparse_sort_matrix(device_queue, spMat) @ccall liboneapi_support.onemklXsparse_sort_matrix(device_queue::syclQueue_t, spMat::matrix_handle_t)::Cint end function onemklSsparse_update_diagonal_values(device_queue, spMat, length, new_diag_values) @ccall liboneapi_support.onemklSsparse_update_diagonal_values(device_queue::syclQueue_t, spMat::matrix_handle_t, length::Int64, new_diag_values::ZePtr{Cfloat})::Cint end function onemklDsparse_update_diagonal_values(device_queue, spMat, length, new_diag_values) @ccall liboneapi_support.onemklDsparse_update_diagonal_values(device_queue::syclQueue_t, spMat::matrix_handle_t, length::Int64, new_diag_values::ZePtr{Cdouble})::Cint end function onemklCsparse_update_diagonal_values(device_queue, spMat, length, new_diag_values) @ccall liboneapi_support.onemklCsparse_update_diagonal_values(device_queue::syclQueue_t, spMat::matrix_handle_t, length::Int64, new_diag_values::ZePtr{ComplexF32})::Cint end function onemklZsparse_update_diagonal_values(device_queue, spMat, length, new_diag_values) @ccall liboneapi_support.onemklZsparse_update_diagonal_values(device_queue::syclQueue_t, spMat::matrix_handle_t, length::Int64, new_diag_values::ZePtr{ComplexF64})::Cint end function onemklXsparse_optimize_gemv(device_queue, opA, A) @ccall liboneapi_support.onemklXsparse_optimize_gemv(device_queue::syclQueue_t, opA::onemklTranspose, A::matrix_handle_t)::Cint end function onemklXsparse_optimize_trmv(device_queue, uplo_val, opA, diag_val, A) @ccall liboneapi_support.onemklXsparse_optimize_trmv(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, A::matrix_handle_t)::Cint end function onemklXsparse_optimize_trsv(device_queue, uplo_val, opA, diag_val, A) @ccall liboneapi_support.onemklXsparse_optimize_trsv(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, A::matrix_handle_t)::Cint end function onemklXsparse_optimize_gemm(device_queue, opA, A) @ccall liboneapi_support.onemklXsparse_optimize_gemm(device_queue::syclQueue_t, opA::onemklTranspose, A::matrix_handle_t)::Cint end function onemklXsparse_optimize_gemm_advanced(device_queue, layout_val, opA, opB, A, columns) @ccall liboneapi_support.onemklXsparse_optimize_gemm_advanced(device_queue::syclQueue_t, layout_val::onemklLayout, opA::onemklTranspose, opB::onemklTranspose, A::matrix_handle_t, columns::Int64)::Cint end function onemklXsparse_optimize_trsm(device_queue, uplo_val, opA, diag_val, A) @ccall liboneapi_support.onemklXsparse_optimize_trsm(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, A::matrix_handle_t)::Cint end function onemklXsparse_optimize_trsm_advanced(device_queue, layout_val, uplo_val, opA, diag_val, A, columns) @ccall liboneapi_support.onemklXsparse_optimize_trsm_advanced(device_queue::syclQueue_t, layout_val::onemklLayout, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, A::matrix_handle_t, columns::Int64)::Cint end function onemklSsparse_gemv(device_queue, opA, alpha, A, x, beta, y) @ccall liboneapi_support.onemklSsparse_gemv(device_queue::syclQueue_t, opA::onemklTranspose, alpha::Ref{Cfloat}, A::matrix_handle_t, x::ZePtr{Cfloat}, beta::Ref{Cfloat}, y::ZePtr{Cfloat})::Cint end function onemklDsparse_gemv(device_queue, opA, alpha, A, x, beta, y) @ccall liboneapi_support.onemklDsparse_gemv(device_queue::syclQueue_t, opA::onemklTranspose, alpha::Ref{Cdouble}, A::matrix_handle_t, x::ZePtr{Cdouble}, beta::Ref{Cdouble}, y::ZePtr{Cdouble})::Cint end function onemklCsparse_gemv(device_queue, opA, alpha, A, x, beta, y) @ccall liboneapi_support.onemklCsparse_gemv(device_queue::syclQueue_t, opA::onemklTranspose, alpha::Ref{ComplexF32}, A::matrix_handle_t, x::ZePtr{ComplexF32}, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32})::Cint end function onemklZsparse_gemv(device_queue, opA, alpha, A, x, beta, y) @ccall liboneapi_support.onemklZsparse_gemv(device_queue::syclQueue_t, opA::onemklTranspose, alpha::Ref{ComplexF64}, A::matrix_handle_t, x::ZePtr{ComplexF64}, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64})::Cint end function onemklSsparse_gemvdot(device_queue, opA, alpha, A, x, beta, y, d) @ccall liboneapi_support.onemklSsparse_gemvdot(device_queue::syclQueue_t, opA::onemklTranspose, alpha::Ref{Cfloat}, A::matrix_handle_t, x::ZePtr{Cfloat}, beta::Ref{Cfloat}, y::ZePtr{Cfloat}, d::ZePtr{Cfloat})::Cint end function onemklDsparse_gemvdot(device_queue, opA, alpha, A, x, beta, y, d) @ccall liboneapi_support.onemklDsparse_gemvdot(device_queue::syclQueue_t, opA::onemklTranspose, alpha::Ref{Cdouble}, A::matrix_handle_t, x::ZePtr{Cdouble}, beta::Ref{Cdouble}, y::ZePtr{Cdouble}, d::ZePtr{Cdouble})::Cint end function onemklCsparse_gemvdot(device_queue, opA, alpha, A, x, beta, y, d) @ccall liboneapi_support.onemklCsparse_gemvdot(device_queue::syclQueue_t, opA::onemklTranspose, alpha::Ref{ComplexF32}, A::matrix_handle_t, x::ZePtr{ComplexF32}, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32}, d::ZePtr{ComplexF32})::Cint end function onemklZsparse_gemvdot(device_queue, opA, alpha, A, x, beta, y, d) @ccall liboneapi_support.onemklZsparse_gemvdot(device_queue::syclQueue_t, opA::onemklTranspose, alpha::Ref{ComplexF64}, A::matrix_handle_t, x::ZePtr{ComplexF64}, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64}, d::ZePtr{ComplexF64})::Cint end function onemklSsparse_symv(device_queue, uplo_val, alpha, A, x, beta, y) @ccall liboneapi_support.onemklSsparse_symv(device_queue::syclQueue_t, uplo_val::onemklUplo, alpha::Ref{Cfloat}, A::matrix_handle_t, x::ZePtr{Cfloat}, beta::Ref{Cfloat}, y::ZePtr{Cfloat})::Cint end function onemklDsparse_symv(device_queue, uplo_val, alpha, A, x, beta, y) @ccall liboneapi_support.onemklDsparse_symv(device_queue::syclQueue_t, uplo_val::onemklUplo, alpha::Ref{Cdouble}, A::matrix_handle_t, x::ZePtr{Cdouble}, beta::Ref{Cdouble}, y::ZePtr{Cdouble})::Cint end function onemklCsparse_symv(device_queue, uplo_val, alpha, A, x, beta, y) @ccall liboneapi_support.onemklCsparse_symv(device_queue::syclQueue_t, uplo_val::onemklUplo, alpha::Ref{ComplexF32}, A::matrix_handle_t, x::ZePtr{ComplexF32}, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32})::Cint end function onemklZsparse_symv(device_queue, uplo_val, alpha, A, x, beta, y) @ccall liboneapi_support.onemklZsparse_symv(device_queue::syclQueue_t, uplo_val::onemklUplo, alpha::Ref{ComplexF64}, A::matrix_handle_t, x::ZePtr{ComplexF64}, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64})::Cint end function onemklSsparse_trmv(device_queue, uplo_val, opA, diag_val, alpha, A, x, beta, y) @ccall liboneapi_support.onemklSsparse_trmv(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, alpha::Ref{Cfloat}, A::matrix_handle_t, x::ZePtr{Cfloat}, beta::Ref{Cfloat}, y::ZePtr{Cfloat})::Cint end function onemklDsparse_trmv(device_queue, uplo_val, opA, diag_val, alpha, A, x, beta, y) @ccall liboneapi_support.onemklDsparse_trmv(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, alpha::Ref{Cdouble}, A::matrix_handle_t, x::ZePtr{Cdouble}, beta::Ref{Cdouble}, y::ZePtr{Cdouble})::Cint end function onemklCsparse_trmv(device_queue, uplo_val, opA, diag_val, alpha, A, x, beta, y) @ccall liboneapi_support.onemklCsparse_trmv(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, alpha::Ref{ComplexF32}, A::matrix_handle_t, x::ZePtr{ComplexF32}, beta::Ref{ComplexF32}, y::ZePtr{ComplexF32})::Cint end function onemklZsparse_trmv(device_queue, uplo_val, opA, diag_val, alpha, A, x, beta, y) @ccall liboneapi_support.onemklZsparse_trmv(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, alpha::Ref{ComplexF64}, A::matrix_handle_t, x::ZePtr{ComplexF64}, beta::Ref{ComplexF64}, y::ZePtr{ComplexF64})::Cint end function onemklSsparse_trsv(device_queue, uplo_val, opA, diag_val, alpha, A, x, y) @ccall liboneapi_support.onemklSsparse_trsv(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, alpha::Ref{Cfloat}, A::matrix_handle_t, x::ZePtr{Cfloat}, y::ZePtr{Cfloat})::Cint end function onemklDsparse_trsv(device_queue, uplo_val, opA, diag_val, alpha, A, x, y) @ccall liboneapi_support.onemklDsparse_trsv(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, alpha::Ref{Cdouble}, A::matrix_handle_t, x::ZePtr{Cdouble}, y::ZePtr{Cdouble})::Cint end function onemklCsparse_trsv(device_queue, uplo_val, opA, diag_val, alpha, A, x, y) @ccall liboneapi_support.onemklCsparse_trsv(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, alpha::Ref{ComplexF32}, A::matrix_handle_t, x::ZePtr{ComplexF32}, y::ZePtr{ComplexF32})::Cint end function onemklZsparse_trsv(device_queue, uplo_val, opA, diag_val, alpha, A, x, y) @ccall liboneapi_support.onemklZsparse_trsv(device_queue::syclQueue_t, uplo_val::onemklUplo, opA::onemklTranspose, diag_val::onemklDiag, alpha::Ref{ComplexF64}, A::matrix_handle_t, x::ZePtr{ComplexF64}, y::ZePtr{ComplexF64})::Cint end function onemklSsparse_gemm(device_queue, layout_val, opA, opX, alpha, A, X, columns, ldx, beta, Y, ldy) @ccall liboneapi_support.onemklSsparse_gemm(device_queue::syclQueue_t, layout_val::onemklLayout, opA::onemklTranspose, opX::onemklTranspose, alpha::Ref{Cfloat}, A::matrix_handle_t, X::ZePtr{Cfloat}, columns::Int64, ldx::Int64, beta::Ref{Cfloat}, Y::ZePtr{Cfloat}, ldy::Int64)::Cint end function onemklDsparse_gemm(device_queue, layout_val, opA, opX, alpha, A, X, columns, ldx, beta, Y, ldy) @ccall liboneapi_support.onemklDsparse_gemm(device_queue::syclQueue_t, layout_val::onemklLayout, opA::onemklTranspose, opX::onemklTranspose, alpha::Ref{Cdouble}, A::matrix_handle_t, X::ZePtr{Cdouble}, columns::Int64, ldx::Int64, beta::Ref{Cdouble}, Y::ZePtr{Cdouble}, ldy::Int64)::Cint end function onemklCsparse_gemm(device_queue, layout_val, opA, opX, alpha, A, X, columns, ldx, beta, Y, ldy) @ccall liboneapi_support.onemklCsparse_gemm(device_queue::syclQueue_t, layout_val::onemklLayout, opA::onemklTranspose, opX::onemklTranspose, alpha::Ref{ComplexF32}, A::matrix_handle_t, X::ZePtr{ComplexF32}, columns::Int64, ldx::Int64, beta::Ref{ComplexF32}, Y::ZePtr{ComplexF32}, ldy::Int64)::Cint end function onemklZsparse_gemm(device_queue, layout_val, opA, opX, alpha, A, X, columns, ldx, beta, Y, ldy) @ccall liboneapi_support.onemklZsparse_gemm(device_queue::syclQueue_t, layout_val::onemklLayout, opA::onemklTranspose, opX::onemklTranspose, alpha::Ref{ComplexF64}, A::matrix_handle_t, X::ZePtr{ComplexF64}, columns::Int64, ldx::Int64, beta::Ref{ComplexF64}, Y::ZePtr{ComplexF64}, ldy::Int64)::Cint end function onemklSsparse_trsm(device_queue, layout_val, opA, opX, uplo_val, diag_val, alpha, A, X, columns, ldx, Y, ldy) @ccall liboneapi_support.onemklSsparse_trsm(device_queue::syclQueue_t, layout_val::onemklLayout, opA::onemklTranspose, opX::onemklTranspose, uplo_val::onemklUplo, diag_val::onemklDiag, alpha::Ref{Cfloat}, A::matrix_handle_t, X::ZePtr{Cfloat}, columns::Int64, ldx::Int64, Y::ZePtr{Cfloat}, ldy::Int64)::Cint end function onemklDsparse_trsm(device_queue, layout_val, opA, opX, uplo_val, diag_val, alpha, A, X, columns, ldx, Y, ldy) @ccall liboneapi_support.onemklDsparse_trsm(device_queue::syclQueue_t, layout_val::onemklLayout, opA::onemklTranspose, opX::onemklTranspose, uplo_val::onemklUplo, diag_val::onemklDiag, alpha::Ref{Cdouble}, A::matrix_handle_t, X::ZePtr{Cdouble}, columns::Int64, ldx::Int64, Y::ZePtr{Cdouble}, ldy::Int64)::Cint end function onemklCsparse_trsm(device_queue, layout_val, opA, opX, uplo_val, diag_val, alpha, A, X, columns, ldx, Y, ldy) @ccall liboneapi_support.onemklCsparse_trsm(device_queue::syclQueue_t, layout_val::onemklLayout, opA::onemklTranspose, opX::onemklTranspose, uplo_val::onemklUplo, diag_val::onemklDiag, alpha::Ref{ComplexF32}, A::matrix_handle_t, X::ZePtr{ComplexF32}, columns::Int64, ldx::Int64, Y::ZePtr{ComplexF32}, ldy::Int64)::Cint end function onemklZsparse_trsm(device_queue, layout_val, opA, opX, uplo_val, diag_val, alpha, A, X, columns, ldx, Y, ldy) @ccall liboneapi_support.onemklZsparse_trsm(device_queue::syclQueue_t, layout_val::onemklLayout, opA::onemklTranspose, opX::onemklTranspose, uplo_val::onemklUplo, diag_val::onemklDiag, alpha::Ref{ComplexF64}, A::matrix_handle_t, X::ZePtr{ComplexF64}, columns::Int64, ldx::Int64, Y::ZePtr{ComplexF64}, ldy::Int64)::Cint end function onemklXsparse_set_matmat_data(descr, viewA, opA, viewB, opB, viewC) @ccall liboneapi_support.onemklXsparse_set_matmat_data(descr::matmat_descr_t, viewA::onemklMatrixView, opA::onemklTranspose, viewB::onemklMatrixView, opB::onemklTranspose, viewC::onemklMatrixView)::Cint end function onemklSsparse_matmatd(device_queue, c_layout, opA, opB, alpha, A, B, beta, C, c_nrows, c_ncols, ldc) @ccall liboneapi_support.onemklSsparse_matmatd(device_queue::syclQueue_t, c_layout::onemklLayout, opA::onemklTranspose, opB::onemklTranspose, alpha::Ref{Cfloat}, A::matrix_handle_t, B::matrix_handle_t, beta::Ref{Cfloat}, C::Ptr{Cfloat}, c_nrows::Int64, c_ncols::Int64, ldc::Int64)::Cint end function onemklDsparse_matmatd(device_queue, c_layout, opA, opB, alpha, A, B, beta, C, c_nrows, c_ncols, ldc) @ccall liboneapi_support.onemklDsparse_matmatd(device_queue::syclQueue_t, c_layout::onemklLayout, opA::onemklTranspose, opB::onemklTranspose, alpha::Ref{Cdouble}, A::matrix_handle_t, B::matrix_handle_t, beta::Ref{Cdouble}, C::Ptr{Cdouble}, c_nrows::Int64, c_ncols::Int64, ldc::Int64)::Cint end function onemklCsparse_matmatd(device_queue, c_layout, opA, opB, alpha, A, B, beta, C, c_nrows, c_ncols, ldc) @ccall liboneapi_support.onemklCsparse_matmatd(device_queue::syclQueue_t, c_layout::onemklLayout, opA::onemklTranspose, opB::onemklTranspose, alpha::Ref{ComplexF32}, A::matrix_handle_t, B::matrix_handle_t, beta::Ref{ComplexF32}, C::Ptr{ComplexF32}, c_nrows::Int64, c_ncols::Int64, ldc::Int64)::Cint end function onemklZsparse_matmatd(device_queue, c_layout, opA, opB, alpha, A, B, beta, C, c_nrows, c_ncols, ldc) @ccall liboneapi_support.onemklZsparse_matmatd(device_queue::syclQueue_t, c_layout::onemklLayout, opA::onemklTranspose, opB::onemklTranspose, alpha::Ref{ComplexF64}, A::matrix_handle_t, B::matrix_handle_t, beta::Ref{ComplexF64}, C::Ptr{ComplexF32}, c_nrows::Int64, c_ncols::Int64, ldc::Int64)::Cint end function onemklXsparse_matmat(device_queue, A, B, C, req, descr, sizeTempBuffer, tempBuffer) @ccall liboneapi_support.onemklXsparse_matmat(device_queue::syclQueue_t, A::matrix_handle_t, B::matrix_handle_t, C::matrix_handle_t, req::onemklMatmatRequest, descr::matmat_descr_t, sizeTempBuffer::Ptr{Int64}, tempBuffer::ZePtr{Cvoid})::Cint end function onemklDestroy() @ccall liboneapi_support.onemklDestroy()::Cint end @cenum onemklDftPrecision::UInt32 begin ONEMKL_DFT_PRECISION_SINGLE = 0 ONEMKL_DFT_PRECISION_DOUBLE = 1 end @cenum onemklDftDomain::UInt32 begin ONEMKL_DFT_DOMAIN_REAL = 0 ONEMKL_DFT_DOMAIN_COMPLEX = 1 end @cenum onemklDftConfigParam::UInt32 begin ONEMKL_DFT_PARAM_FORWARD_DOMAIN = 0 ONEMKL_DFT_PARAM_DIMENSION = 1 ONEMKL_DFT_PARAM_LENGTHS = 2 ONEMKL_DFT_PARAM_PRECISION = 3 ONEMKL_DFT_PARAM_FORWARD_SCALE = 4 ONEMKL_DFT_PARAM_BACKWARD_SCALE = 5 ONEMKL_DFT_PARAM_NUMBER_OF_TRANSFORMS = 6 ONEMKL_DFT_PARAM_COMPLEX_STORAGE = 7 ONEMKL_DFT_PARAM_PLACEMENT = 8 ONEMKL_DFT_PARAM_INPUT_STRIDES = 9 ONEMKL_DFT_PARAM_OUTPUT_STRIDES = 10 ONEMKL_DFT_PARAM_FWD_DISTANCE = 11 ONEMKL_DFT_PARAM_BWD_DISTANCE = 12 ONEMKL_DFT_PARAM_WORKSPACE = 13 ONEMKL_DFT_PARAM_WORKSPACE_ESTIMATE_BYTES = 14 ONEMKL_DFT_PARAM_WORKSPACE_BYTES = 15 ONEMKL_DFT_PARAM_FWD_STRIDES = 16 ONEMKL_DFT_PARAM_BWD_STRIDES = 17 ONEMKL_DFT_PARAM_WORKSPACE_PLACEMENT = 18 ONEMKL_DFT_PARAM_WORKSPACE_EXTERNAL_BYTES = 19 end @cenum onemklDftConfigValue::UInt32 begin ONEMKL_DFT_VALUE_COMMITTED = 0 ONEMKL_DFT_VALUE_UNCOMMITTED = 1 ONEMKL_DFT_VALUE_COMPLEX_COMPLEX = 2 ONEMKL_DFT_VALUE_REAL_REAL = 3 ONEMKL_DFT_VALUE_INPLACE = 4 ONEMKL_DFT_VALUE_NOT_INPLACE = 5 ONEMKL_DFT_VALUE_WORKSPACE_AUTOMATIC = 6 ONEMKL_DFT_VALUE_ALLOW = 7 ONEMKL_DFT_VALUE_AVOID = 8 ONEMKL_DFT_VALUE_WORKSPACE_INTERNAL = 9 ONEMKL_DFT_VALUE_WORKSPACE_EXTERNAL = 10 end mutable struct onemklDftDescriptor_st end const onemklDftDescriptor_t = Ptr{onemklDftDescriptor_st} function onemklDftCreate1D(desc, precision, domain, length) @ccall liboneapi_support.onemklDftCreate1D(desc::Ptr{onemklDftDescriptor_t}, precision::onemklDftPrecision, domain::onemklDftDomain, length::Int64)::Cint end function onemklDftCreateND(desc, precision, domain, dim, lengths) @ccall liboneapi_support.onemklDftCreateND(desc::Ptr{onemklDftDescriptor_t}, precision::onemklDftPrecision, domain::onemklDftDomain, dim::Int64, lengths::Ptr{Int64})::Cint end function onemklDftDestroy(desc) @ccall liboneapi_support.onemklDftDestroy(desc::onemklDftDescriptor_t)::Cint end function onemklDftCommit(desc, queue) @ccall liboneapi_support.onemklDftCommit(desc::onemklDftDescriptor_t, queue::syclQueue_t)::Cint end function onemklDftSetValueInt64(desc, param, value) @ccall liboneapi_support.onemklDftSetValueInt64(desc::onemklDftDescriptor_t, param::onemklDftConfigParam, value::Int64)::Cint end function onemklDftSetValueDouble(desc, param, value) @ccall liboneapi_support.onemklDftSetValueDouble(desc::onemklDftDescriptor_t, param::onemklDftConfigParam, value::Cdouble)::Cint end function onemklDftSetValueInt64Array(desc, param, values, n) @ccall liboneapi_support.onemklDftSetValueInt64Array(desc::onemklDftDescriptor_t, param::onemklDftConfigParam, values::Ptr{Int64}, n::Int64)::Cint end function onemklDftSetValueConfigValue(desc, param, value) @ccall liboneapi_support.onemklDftSetValueConfigValue(desc::onemklDftDescriptor_t, param::onemklDftConfigParam, value::onemklDftConfigValue)::Cint end function onemklDftGetValueInt64(desc, param, value) @ccall liboneapi_support.onemklDftGetValueInt64(desc::onemklDftDescriptor_t, param::onemklDftConfigParam, value::Ptr{Int64})::Cint end function onemklDftGetValueDouble(desc, param, value) @ccall liboneapi_support.onemklDftGetValueDouble(desc::onemklDftDescriptor_t, param::onemklDftConfigParam, value::Ptr{Cdouble})::Cint end function onemklDftGetValueInt64Array(desc, param, values, n) @ccall liboneapi_support.onemklDftGetValueInt64Array(desc::onemklDftDescriptor_t, param::onemklDftConfigParam, values::Ptr{Int64}, n::Ptr{Int64})::Cint end function onemklDftGetValueConfigValue(desc, param, value) @ccall liboneapi_support.onemklDftGetValueConfigValue(desc::onemklDftDescriptor_t, param::onemklDftConfigParam, value::Ptr{onemklDftConfigValue})::Cint end function onemklDftComputeForward(desc, inout) @ccall liboneapi_support.onemklDftComputeForward(desc::onemklDftDescriptor_t, inout::Ptr{Cvoid})::Cint end function onemklDftComputeForwardOutOfPlace(desc, in, out) @ccall liboneapi_support.onemklDftComputeForwardOutOfPlace(desc::onemklDftDescriptor_t, in::Ptr{Cvoid}, out::Ptr{Cvoid})::Cint end function onemklDftComputeBackward(desc, inout) @ccall liboneapi_support.onemklDftComputeBackward(desc::onemklDftDescriptor_t, inout::Ptr{Cvoid})::Cint end function onemklDftComputeBackwardOutOfPlace(desc, in, out) @ccall liboneapi_support.onemklDftComputeBackwardOutOfPlace(desc::onemklDftDescriptor_t, in::Ptr{Cvoid}, out::Ptr{Cvoid})::Cint end function onemklDftComputeForwardBuffer(desc, inout) @ccall liboneapi_support.onemklDftComputeForwardBuffer(desc::onemklDftDescriptor_t, inout::Ptr{Cvoid})::Cint end function onemklDftComputeForwardOutOfPlaceBuffer(desc, in, out) @ccall liboneapi_support.onemklDftComputeForwardOutOfPlaceBuffer(desc::onemklDftDescriptor_t, in::Ptr{Cvoid}, out::Ptr{Cvoid})::Cint end function onemklDftComputeBackwardBuffer(desc, inout) @ccall liboneapi_support.onemklDftComputeBackwardBuffer(desc::onemklDftDescriptor_t, inout::Ptr{Cvoid})::Cint end function onemklDftComputeBackwardOutOfPlaceBuffer(desc, in, out) @ccall liboneapi_support.onemklDftComputeBackwardOutOfPlaceBuffer(desc::onemklDftDescriptor_t, in::Ptr{Cvoid}, out::Ptr{Cvoid})::Cint end function onemklDftQueryParamIndices(out, n) @ccall liboneapi_support.onemklDftQueryParamIndices(out::Ptr{Int64}, n::Int64)::Cint end const ONEMKL_DFT_STATUS_SUCCESS = 0 const ONEMKL_DFT_STATUS_ERROR = -1 const ONEMKL_DFT_STATUS_INVALID_ARGUMENT = -2 const ONEMKL_DFT_STATUS_BAD_STATE = -3 ================================================ FILE: lib/sycl/SYCL.jl ================================================ module SYCL using ..oneAPI using ..oneL0 using ..Support export syclPlatform, syclDevice, syclContext, syclQueue, syclEvent mutable struct syclPlatform handle::syclPlatform_t function syclPlatform(drv::ZeDriver) handle = Ref{syclPlatform_t}() syclPlatformCreate(handle, drv) obj = new(handle[]) finalizer(obj) do sycl_platform syclPlatformDestroy(sycl_platform) end end end Base.unsafe_convert(::Type{syclPlatform_t}, sycl_platform::syclPlatform) = sycl_platform.handle mutable struct syclDevice handle::syclDevice_t ze_dev::ZeDevice function syclDevice(platform::syclPlatform, ze_dev::ZeDevice) handle = Ref{syclDevice_t}() syclDeviceCreate(handle, platform, ze_dev) obj = new(handle[], ze_dev) finalizer(obj) do dev syclDeviceDestroy(dev) end end end Base.unsafe_convert(::Type{syclDevice_t}, dev::syclDevice) = dev.handle mutable struct syclContext handle::syclContext_t devs::Vector{syclDevice} ze_ctx::ZeContext function syclContext(devs::Vector{syclDevice}, ze_ctx::ZeContext) handle = Ref{syclContext_t}() syclContextCreate(handle, devs, length(devs), ze_ctx, true) obj = new(handle[], devs, ze_ctx) finalizer(obj) do ctx onemklDestroy() syclContextDestroy(ctx) end end end Base.unsafe_convert(::Type{syclContext_t}, ctx::syclContext) = ctx.handle mutable struct syclQueue handle::syclQueue_t ctx::syclContext dev::syclDevice ze_queue::ZeCommandQueue function syclQueue(ctx::syclContext, dev::syclDevice, ze_queue::ZeCommandQueue) handle = Ref{syclQueue_t}() syclQueueCreate(handle, ctx, dev, ze_queue, true) obj = new(handle[], ctx, dev, ze_queue) finalizer(obj) do queue syclQueueDestroy(queue) end end end Base.unsafe_convert(::Type{syclQueue_t}, queue::syclQueue) = queue.handle mutable struct syclEvent handle::syclEvent_t ctx::syclContext ze_event::ZeEvent function syclEvent(ctx::syclContext, ze_event::ZeEvent) handle = Ref{syclEvent_t}() syclEventCreate(handle, ctx, ze_event, true) obj = new(handle[], ctx, ze_event) finalizer(obj) do event syclEventDestroy(event) end end end Base.unsafe_convert(::Type{syclEvent_t}, event::syclEvent) = event.handle end ================================================ FILE: lib/utils/APIUtils.jl ================================================ module APIUtils # helpers that facilitate working with C APIs using GPUToolbox: @checked, @debug_ccall export @checked, @debug_ccall include("enum.jl") end ================================================ FILE: lib/utils/enum.jl ================================================ export @enum_without_prefix ## redeclare enum values without a prefix # this is useful when enum values from an underlying C library, typically prefixed for the # lack of namespacing in C, are to be used in Julia where we do have module namespacing. macro enum_without_prefix(enum, prefix) if isa(enum, Symbol) mod = __module__ elseif Meta.isexpr(enum, :(.)) mod = getfield(__module__, enum.args[1]) enum = enum.args[2].value else error("Do not know how to refer to $enum") end enum = getfield(mod, enum) prefix = String(prefix) ex = quote end for instance in instances(enum) name = String(Symbol(instance)) @assert startswith(name, prefix) push!(ex.args, :(const $(Symbol(name[length(prefix)+1:end])) = $(mod).$(Symbol(name)))) end return esc(ex) end ================================================ FILE: res/Project.toml ================================================ [deps] Clang = "40e3b903-d033-50b4-a0cc-940c62c95e31" JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d" ================================================ FILE: res/libze_prologue.jl ================================================ # outlined functionality to avoid GC frame allocation @noinline function throw_api_error(res) if res == RESULT_ERROR_OUT_OF_HOST_MEMORY || res == RESULT_ERROR_OUT_OF_DEVICE_MEMORY throw(OutOfGPUMemoryError()) else throw(ZeError(res)) end end function check(f) res = retry_reclaim(err -> err == RESULT_ERROR_OUT_OF_HOST_MEMORY || err == RESULT_ERROR_OUT_OF_DEVICE_MEMORY) do f() end if res != RESULT_SUCCESS throw_api_error(res) end return end ================================================ FILE: res/local.jl ================================================ ## generate preferences for loading a local copy of the oneAPI toolchain # # discovery # import Libdl function scan_library!(output, lib, locations=String[]) name = Libdl.find_library(lib, locations) if name != "" path = Libdl.dlopen(name) do handle Libdl.dlpath(handle) end println("- found $lib at $path") output[lib] = path else println("- did not find $lib") end end # NOTE: some JLLs also provide binaries (e.g. ocloc, iga64, etc), # but we don't scan for them if our toolchain does not use them igc = Dict() println("Trying to find local IGC...") for lib = ["libigc", "libiga64", "libigdfcl", "libopencl-clang"] scan_library!(igc, lib) end gmmlib = Dict() println("\nTrying to find local gmmlib...") scan_library!(gmmlib, "libigdgmm") neo = Dict() println("\nTrying to find local NEO...") ## version suffixed scan_library!(neo, "libze_intel_gpu.so.1") ## in intel-opencl subdirectory locations = String[] if haskey(igc, "libigc") push!(locations, joinpath(dirname(igc["libigc"]), "intel-opencl")) end scan_library!(neo, "libigdrcl", locations) loader = Dict() println("\nTrying to find local oneAPI loader...") scan_library!(loader, "libze_loader") scan_library!(loader, "libze_validation_layer") # # setting preferences # println("\nWriting preferences:\n") using Pkg # use a temporary environment to install packages we need Pkg.activate(; temp=true) Pkg.add(["Preferences", "NEO_jll", "oneAPI_Level_Zero_Loader_jll"]) using Preferences using NEO_jll, oneAPI_Level_Zero_Loader_jll # activate the global environment, where we'll set the preferences Pkg.activate() # work around Preferences.jl#34 if !isfile(Base.active_project()) mkpath(dirname(Base.active_project())) touch(Base.active_project()) end function set_preferences(mod, entries) for (lib, path) in entries binding = replace(split(lib, '.')[1], "-" => "_") if binding == "libiga64" binding = "libiga" # sigh end set_preferences!(mod, binding * "_path" => path) end end set_preferences(NEO_jll, neo) set_preferences(NEO_jll.libigc_jll, igc) set_preferences(NEO_jll.gmmlib_jll, gmmlib) set_preferences(oneAPI_Level_Zero_Loader_jll, loader) println(""" Prefences have been written to `$(joinpath(dirname(Base.active_project()), "LocalPreferences.toml"))`. Please modify the file to your liking, and remove the oneAPI-related preferences (or the entire file) to revert to the original binaries.""") ================================================ FILE: res/support.toml ================================================ [general] library_name = "liboneapi_support" output_file_path = "../lib/support/liboneapi_support.jl" [codegen] use_ccall_macro = true [api.onemklXgemm_batch.argtypes] 4 = "ZePtr{Int64}" 5 = "ZePtr{Int64}" 6 = "ZePtr{Int64}" 7 = "ZePtr{T}" 8 = "ZePtr{Ptr{T}}" 9 = "ZePtr{Int64}" 10 = "ZePtr{Ptr{T}}" 11 = "ZePtr{Int64}" 12 = "ZePtr{T}" 13 = "ZePtr{Ptr{T}}" 14 = "ZePtr{Int64}" 16 = "ZePtr{Int64}" [api.onemklXtrsm_batch.argtypes] 6 = "ZePtr{Int64}" 7 = "ZePtr{Int64}" 8 = "ZePtr{T}" 9 = "ZePtr{Ptr{T}}" 10 = "ZePtr{Int64}" 11 = "ZePtr{Ptr{T}}" 12 = "ZePtr{Int64}" 14 = "ZePtr{Int64}" [api.onemklXgemm_batch_strided.argtypes] 7 = "Ref{T}" 8 = "ZePtr{T}" 11 = "ZePtr{T}" 14 = "Ref{T}" 15 = "ZePtr{T}" [api.onemklXgemm.argtypes] 7 = "Ref{T}" 8 = "ZePtr{T}" 10 = "ZePtr{T}" 12 = "Ref{T}" 13 = "ZePtr{T}" [api.onemklXsymm.argtypes] 6 = "Ref{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" 11 = "Ref{T}" 12 = "ZePtr{T}" [api.onemklXsyrk.argtypes] 6 = "Ref{T}" 7 = "ZePtr{T}" 9 = "Ref{T}" 10 = "ZePtr{T}" [api.onemklXsyr2k.argtypes] 6 = "Ref{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" 11 = "Ref{T}" 12 = "ZePtr{T}" [api.onemklXtrmm.argtypes] 8 = "Ref{T}" 9 = "ZePtr{T}" 11 = "ZePtr{T}" [api.onemklXtrmm_variant.argtypes] 8 = "Ref{T}" 9 = "ZePtr{T}" 11 = "ZePtr{T}" 13 = "Ref{T}" 14 = "ZePtr{T}" [api.onemklXtrsm.argtypes] 8 = "Ref{T}" 9 = "ZePtr{T}" 11 = "ZePtr{T}" [api.onemklXtrsm_variant.argtypes] 8 = "Ref{T}" 9 = "ZePtr{T}" 11 = "ZePtr{T}" 13 = "Ref{T}" 14 = "ZePtr{T}" [api.onemklXhemm.argtypes] 6 = "Ref{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" 11 = "Ref{T}" 12 = "ZePtr{T}" [api.onemklXherk.argtypes] 6 = "Ref{T}" 7 = "ZePtr{T}" 9 = "Ref{T}" 10 = "ZePtr{T}" [api.onemklXher2k.argtypes] 6 = "Ref{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" 11 = "Ref{T}" 12 = "ZePtr{T}" [api.onemklXgbmv.argtypes] 7 = "Ref{T}" 8 = "ZePtr{T}" 10 = "ZePtr{T}" 12 = "Ref{T}" 13 = "ZePtr{T}" [api.onemklXgemv.argtypes] 5 = "Ref{T}" 6 = "ZePtr{T}" 8 = "ZePtr{T}" 10 = "Ref{T}" 11 = "ZePtr{T}" [api.onemklXdot.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{T}" 7 = "RefOrZeRef{T}" [api.onemklXdotc.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{T}" 7 = "RefOrZeRef{T}" [api.onemklXdotu.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{T}" 7 = "RefOrZeRef{T}" [api.onemklXasum.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{T}" [api.onemklCasum.argtypes] 3 = "ZePtr{ComplexF32}" 5 = "ZePtr{Cfloat}" [api.onemklZasum.argtypes] 3 = "ZePtr{ComplexF64}" 5 = "ZePtr{Float64}" [api.onemklXaxpy.argtypes] 3 = "Ref{T}" 4 = "ZePtr{T}" 6 = "ZePtr{T}" [api.onemklXaxpby.argtypes] 3 = "Ref{T}" 4 = "ZePtr{T}" 6 = "Ref{T}" 7 = "ZePtr{T}" [api.onemklXrot.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{T}" 7 = "Ref{T}" 8 = "Ref{T}" [api.onemklCrot.argtypes] 3 = "ZePtr{ComplexF32}" 5 = "ZePtr{ComplexF32}" 7 = "Ref{Float32}" 8 = "Ref{ComplexF32}" [api.onemklZrot.argtypes] 3 = "ZePtr{ComplexF64}" 5 = "ZePtr{ComplexF64}" 7 = "Ref{Float64}" 8 = "Ref{ComplexF64}" [api.onemklCSrot.argtypes] 3 = "ZePtr{ComplexF32}" 5 = "ZePtr{ComplexF32}" 7 = "Ref{Float32}" 8 = "Ref{Float32}" [api.onemklZDrot.argtypes] 3 = "ZePtr{ComplexF64}" 5 = "ZePtr{ComplexF64}" 7 = "Ref{Float64}" 8 = "Ref{Float64}" [api.onemklXscal.argtypes] 3 = "Ref{T}" 4 = "ZePtr{T}" [api.onemklCSscal.argtypes] 3 = "Ref{Float32}" 4 = "ZePtr{ComplexF32}" [api.onemklZDscal.argtypes] 3 = "Ref{Float64}" 4 = "ZePtr{ComplexF64}" [api.onemklXger.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklXgerc.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklXhemv.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "Ref{T}" 10 = "ZePtr{T}" [api.onemklXhbmv.argtypes] 5 = "Ref{T}" 6 = "ZePtr{T}" 8 = "ZePtr{T}" 10 = "Ref{T}" 11 = "ZePtr{T}" [api.onemklXher.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" [api.onemklXher2.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklXsbmv.argtypes] 5 = "Ref{T}" 6 = "ZePtr{T}" 8 = "ZePtr{T}" 10 = "Ref{T}" 11 = "ZePtr{T}" [api.onemklXsymv.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "Ref{T}" 10 = "ZePtr{T}" [api.onemklXsyr.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" [api.onemklXtbmv.argtypes] 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklXtrmv.argtypes] 6 = "ZePtr{T}" 8 = "ZePtr{T}" [api.onemklXtrsv.argtypes] 6 = "ZePtr{T}" 8 = "ZePtr{T}" [api.onemklXnrm2.argtypes] 3 = "ZePtr{T}" 5 = "RefOrZeRef{T}" [api.onemklCnrm2.argtypes] 3 = "ZePtr{ComplexF32}" 5 = "RefOrZeRef{Cfloat}" [api.onemklZnrm2.argtypes] 3 = "ZePtr{ComplexF64}" 5 = "RefOrZeRef{Cdouble}" [api.onemklXcopy.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{T}" [api.onemklXiamax.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{Int64}" [api.onemklXiamax_64.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{Int64}" [api.onemklXiamin.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{Int64}" [api.onemklXiamin_64.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{Int64}" [api.onemklXswap.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{T}" [api.onemklXdgmm.argtypes] 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklXgeru.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklXhpmv.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 6 = "ZePtr{T}" 8 = "Ref{T}" 9 = "ZePtr{T}" [api.onemklChpr.argtypes] 4 = "Ref{Float32}" 5 = "ZePtr{ComplexF32}" 7 = "ZePtr{ComplexF32}" [api.onemklZhpr.argtypes] 4 = "Ref{Float64}" 5 = "ZePtr{ComplexF64}" 7 = "ZePtr{ComplexF64}" [api.onemklXhpr2.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklXsyr2.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklXspmv.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 6 = "ZePtr{T}" 8 = "Ref{T}" 9 = "ZePtr{T}" [api.onemklXspr.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" [api.onemklXspr2.argtypes] 4 = "Ref{T}" 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklSsdsdot.argtypes] 3 = "Ref{Float32}" 4 = "ZePtr{Float32}" 6 = "ZePtr{Float32}" 8 = "Ref{Float32}" [api.onemklXsparse_set_csr_data.argtypes] 6 = "ZePtr{Int32}" 7 = "ZePtr{Int32}" 8 = "ZePtr{T}" [api.onemklXsparse_set_csr_data_64.argtypes] 6 = "ZePtr{Int64}" 7 = "ZePtr{Int64}" 8 = "ZePtr{T}" [api.onemklXsparse_set_coo_data.argtypes] 7 = "ZePtr{Int32}" 8 = "ZePtr{Int32}" 9 = "ZePtr{T}" [api.onemklXsparse_set_coo_data_64.argtypes] 7 = "ZePtr{Int64}" 8 = "ZePtr{Int64}" 9 = "ZePtr{T}" [api.onemklXsparse_gemv.argtypes] 3 = "Ref{T}" 5 = "ZePtr{T}" 6 = "Ref{T}" 7 = "ZePtr{T}" [api.onemklXsparse_symv.argtypes] 3 = "Ref{T}" 5 = "ZePtr{T}" 6 = "Ref{T}" 7 = "ZePtr{T}" [api.onemklXsparse_trmv.argtypes] 5 = "Ref{T}" 7 = "ZePtr{T}" 8 = "Ref{T}" 9 = "ZePtr{T}" [api.onemklXsparse_trsv.argtypes] 5 = "Ref{T}" 7 = "ZePtr{T}" 8 = "ZePtr{T}" [api.onemklXsparse_update_diagonal_values.argtypes] 4 = "ZePtr{T}" [api.onemklXsparse_gemvdot.argtypes] 3 = "Ref{T}" 5 = "ZePtr{T}" 6 = "Ref{T}" 7 = "ZePtr{T}" 8 = "ZePtr{T}" [api.onemklXsparse_gemm.argtypes] 5 = "Ref{T}" 7 = "ZePtr{T}" 10 = "Ref{T}" 11 = "ZePtr{T}" [api.onemklXsparse_matmat.argtypes] 8 = "ZePtr{Cvoid}" [api.onemklXpotrf.argtypes] 4 = "ZePtr{T}" 6 = "ZePtr{T}" [api.onemklXpotrs.argtypes] 5 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklXpotri.argtypes] 4 = "ZePtr{T}" 6 = "ZePtr{T}" [api.onemklXsytrf.argtypes] 4 = "ZePtr{T}" 6 = "ZePtr{Int64}" 7 = "ZePtr{T}" [api.onemklXgetrf.argtypes] 4 = "ZePtr{T}" 6 = "ZePtr{Int64}" 7 = "ZePtr{T}" [api.onemklXgetrs.argtypes] 5 = "ZePtr{T}" 7 = "ZePtr{Int64}" 8 = "ZePtr{T}" 10 = "ZePtr{T}" [api.onemklXgetri.argtypes] 3 = "ZePtr{T}" 5 = "ZePtr{Int64}" 6 = "ZePtr{T}" [api.onemklXgeqrf.argtypes] 4 = "ZePtr{T}" 6 = "ZePtr{T}" 7 = "ZePtr{T}" [api.onemklXormqr.argtypes] 7 = "ZePtr{T}" 9 = "ZePtr{T}" 10 = "ZePtr{T}" 12 = "ZePtr{T}" [api.onemklXunmqr.argtypes] 7 = "ZePtr{T}" 9 = "ZePtr{T}" 10 = "ZePtr{T}" 12 = "ZePtr{T}" [api.onemklXorgqr.argtypes] 5 = "ZePtr{T}" 7 = "ZePtr{T}" 8 = "ZePtr{T}" [api.onemklXungqr.argtypes] 5 = "ZePtr{T}" 7 = "ZePtr{T}" 8 = "ZePtr{T}" [api.onemklSgebrd.argtypes] 4 = "ZePtr{Float32}" 6 = "ZePtr{Float32}" 7 = "ZePtr{Float32}" 8 = "ZePtr{Float32}" 9 = "ZePtr{Float32}" 10 = "ZePtr{Float32}" [api.onemklDgebrd.argtypes] 4 = "ZePtr{Float64}" 6 = "ZePtr{Float64}" 7 = "ZePtr{Float64}" 8 = "ZePtr{Float64}" 9 = "ZePtr{Float64}" 10 = "ZePtr{Float64}" [api.onemklCgebrd.argtypes] 4 = "ZePtr{ComplexF32}" 6 = "ZePtr{Float32}" 7 = "ZePtr{ComplexF32}" 8 = "ZePtr{ComplexF32}" 9 = "ZePtr{ComplexF32}" 10 = "ZePtr{ComplexF32}" [api.onemklZgebrd.argtypes] 4 = "ZePtr{ComplexF64}" 6 = "ZePtr{Float64}" 7 = "ZePtr{ComplexF64}" 8 = "ZePtr{ComplexF64}" 9 = "ZePtr{ComplexF64}" 10 = "ZePtr{ComplexF64}" [api.onemklSgesvd.argtypes] 6 = "ZePtr{Float32}" 8 = "ZePtr{Float32}" 9 = "ZePtr{Float32}" 11 = "ZePtr{Float32}" 13 = "ZePtr{Float32}" [api.onemklDgesvd.argtypes] 6 = "ZePtr{Float64}" 8 = "ZePtr{Float64}" 9 = "ZePtr{Float64}" 11 = "ZePtr{Float64}" 13 = "ZePtr{Float64}" [api.onemklCgesvd.argtypes] 6 = "ZePtr{ComplexF32}" 8 = "ZePtr{Float32}" 9 = "ZePtr{ComplexF32}" 11 = "ZePtr{ComplexF32}" 13 = "ZePtr{ComplexF32}" [api.onemklZgesvd.argtypes] 6 = "ZePtr{ComplexF64}" 8 = "ZePtr{Float64}" 9 = "ZePtr{ComplexF64}" 11 = "ZePtr{ComplexF64}" 13 = "ZePtr{ComplexF64}" [api.onemklXtrtri.argtypes] 5 = "ZePtr{T}" 7 = "ZePtr{T}" [api.onemklXgesv.argtypes] 4 = "ZePtr{T}" 6 = "ZePtr{T}" 7 = "ZePtr{T}" 9 = "ZePtr{T}" [api.onemklXgetrf_batch.argtypes] 4 = "ZePtr{Ptr{T}}" 6 = "ZePtr{Ptr{Int64}}" 9 = "ZePtr{T}" [api.onemklXgetrs_batch.argtypes] 5 = "ZePtr{Ptr{T}}" 7 = "ZePtr{Ptr{Int64}}" 8 = "ZePtr{Ptr{T}}" 12 = "ZePtr{T}" [api.onemklXgetri_batch.argtypes] 3 = "ZePtr{Ptr{T}}" 5 = "ZePtr{Ptr{Int64}}" 8 = "ZePtr{T}" [api.onemklXgeqrf_batch.argtypes] 4 = "ZePtr{Ptr{T}}" 6 = "ZePtr{Ptr{T}}" 9 = "ZePtr{T}" [api.onemklXorgqr_batch.argtypes] 5 = "ZePtr{Ptr{T}}" 7 = "ZePtr{Ptr{T}}" 10 = "ZePtr{T}" [api.onemklXungqr_batch.argtypes] 5 = "ZePtr{Ptr{T}}" 7 = "ZePtr{Ptr{T}}" 10 = "ZePtr{T}" [api.onemklXpotrf_batch.argtypes] 4 = "ZePtr{Ptr{T}}" 8 = "ZePtr{T}" [api.onemklXpotrs_batch.argtypes] 5 = "ZePtr{Ptr{T}}" 7 = "ZePtr{Ptr{T}}" 11 = "ZePtr{T}" [api.onemklXsyevd.argtypes] 5 = "ZePtr{T}" 7 = "ZePtr{T}" 8 = "ZePtr{T}" [api.onemklCheevd.argtypes] 5 = "ZePtr{ComplexF32}" 7 = "ZePtr{Float32}" 8 = "ZePtr{ComplexF32}" [api.onemklZheevd.argtypes] 5 = "ZePtr{ComplexF64}" 7 = "ZePtr{Float64}" 8 = "ZePtr{ComplexF64}" [api.onemklXsygvd.argtypes] 6 = "ZePtr{T}" 8 = "ZePtr{T}" 10 = "ZePtr{T}" 11 = "ZePtr{T}" [api.onemklChegvd.argtypes] 6 = "ZePtr{ComplexF32}" 8 = "ZePtr{ComplexF32}" 10 = "ZePtr{Float32}" 11 = "ZePtr{ComplexF32}" [api.onemklZhegvd.argtypes] 6 = "ZePtr{ComplexF64}" 8 = "ZePtr{ComplexF64}" 10 = "ZePtr{Float64}" 11 = "ZePtr{ComplexF64}" [api.onemklXsparse_trsm.argtypes] 7 = "Ref{T}" 9 = "ZePtr{T}" 12 = "ZePtr{T}" [api.onemklXsparse_matmatd.argtypes] 5 = "Ref{T}" 8 = "Ref{T}" ================================================ FILE: res/wrap.jl ================================================ # script to parse oneAPI headers and generate Julia wrappers # # Parsing # using Clang using Clang.Generators using JuliaFormatter function wrap(name, headers...; defines=[], include_dirs=[], dependents=true) @info "Wrapping $name" args = get_default_args() for include_dir in include_dirs push!(args, "-isystem$include_dir") end options = load_options(joinpath(@__DIR__, "$(name).toml")) # create context ctx = create_context([headers...], args, options) # run generator build!(ctx, BUILDSTAGE_NO_PRINTING) # if requested, only wrap stuff from the list of headers # (i.e., not from included ones) if !dependents function rewrite!(dag::ExprDAG) replace!(get_nodes(dag)) do node path = normpath(Clang.get_filename(node.cursor)) if !in(path, headers) return ExprNode(node.id, Generators.Skip(), node.cursor, Expr[], node.adj) end return node end end rewrite!(ctx.dag) end rewriter!(ctx, options) build!(ctx, BUILDSTAGE_PRINTING_ONLY) format_file(options["general"]["output_file_path"], YASStyle()) return end function rewriter!(ctx, options) for node in get_nodes(ctx.dag) if Generators.is_function(node) && !Generators.is_variadic_function(node) expr = node.exprs[1] call_expr = expr.args[2].args[1].args[3] # assumes `@ccall` target_expr = call_expr.args[1].args[1] fn = String(target_expr.args[2].value) # rewrite pointer argument types arg_exprs = call_expr.args[1].args[2:end] if haskey(options, "api") && haskey(options["api"], fn) argtypes = get(options["api"][fn], "argtypes", Dict()) for (arg, typ) in argtypes i = parse(Int, arg) arg_exprs[i].args[2] = Meta.parse(typ) end elseif startswith(fn, "onemkl") # oneMKL contains many almost-identical functions, e.g., `onemkl[SDCZH]gemm`, # for which we only register a single `onemklXgemm` with `T` placeholders. generic_fn = "onemklX" * fn[8:end] if haskey(options["api"], generic_fn) argtypes = get(options["api"][generic_fn], "argtypes", Dict()) typcode = fn[7] T = typcode == 'S' ? "Cfloat" : typcode == 'D' ? "Cdouble" : typcode == 'C' ? "ComplexF32" : typcode == 'H' ? "Float16" : typcode == 'Z' ? "ComplexF64" : error("unknown type code $typcode") for (arg, typ) in argtypes i = parse(Int, arg) actual_typ = replace(typ, r"\bT\b" => T) arg_exprs[i].args[2] = Meta.parse(actual_typ) end end end # insert `@checked` before each function with a `ccall` returning a checked type` rettyp = call_expr.args[2] checked_types = if haskey(options, "api") get(options["api"], "checked_rettypes", String[]) else String[] end if rettyp isa Symbol && String(rettyp) in checked_types node.exprs[1] = Expr(:macrocall, Symbol("@checked"), nothing, expr) end end end end # # Main application # using oneAPI_Level_Zero_Headers_jll function main() wrap("ze", oneAPI_Level_Zero_Headers_jll.ze_api) wrap( "support", joinpath(dirname(@__DIR__), "deps", "src", "sycl.h"), joinpath(dirname(@__DIR__), "deps", "src", "onemkl.h"), joinpath(dirname(@__DIR__), "deps", "src", "onemkl_dft.h"); dependents=false, include_dirs=[dirname(dirname(oneAPI_Level_Zero_Headers_jll.ze_api))] ) end isinteractive() || main() ================================================ FILE: res/ze.toml ================================================ [general] library_name = "libze_loader" output_file_path = "../lib/level-zero/libze.jl" prologue_file_path = "./libze_prologue.jl" [codegen] use_ccall_macro = true [api] checked_rettypes = [ "ze_result_t" ] [api.zeCommandListAppendMemoryCopy.argtypes] 2 = "PtrOrZePtr{Cvoid}" 3 = "PtrOrZePtr{Cvoid}" [api.zeCommandListAppendMemoryFill.argtypes] 2 = "PtrOrZePtr{Cvoid}" 3 = "PtrOrZePtr{Cvoid}" [api.zeCommandListAppendMemoryCopyRegion.argtypes] 2 = "PtrOrZePtr{Cvoid}" 6 = "PtrOrZePtr{Cvoid}" [api.zeCommandListAppendMemoryCopyFromContext.argtypes] 2 = "PtrOrZePtr{Cvoid}" 4 = "PtrOrZePtr{Cvoid}" [api.zeCommandListAppendMemoryPrefetch.argtypes] 2 = "PtrOrZePtr{Cvoid}" [api.zeCommandListAppendMemAdvise.argtypes] 3 = "PtrOrZePtr{Cvoid}" [api.zeMemFree.argtypes] 2 = "PtrOrZePtr{Cvoid}" [api.zeMemFreeExt.argtypes] 3 = "PtrOrZePtr{Cvoid}" [api.zeMemGetAllocProperties.argtypes] 2 = "PtrOrZePtr{Cvoid}" [api.zeMemGetAddressRange.argtypes] 2 = "PtrOrZePtr{Cvoid}" [api.zeMemGetIpcHandle.argtypes] 2 = "PtrOrZePtr{Cvoid}" [api.zeMemOpenIpcHandle.argtypes] 5 = "Ptr{PtrOrZePtr{Cvoid}}" [api.zeMemCloseIpcHandle.argtypes] 2 = "PtrOrZePtr{Cvoid}" [api.zeContextEvictMemory.argtypes] 3 = "PtrOrZePtr{Cvoid}" [api.zeContextMakeMemoryResident.argtypes] 3 = "PtrOrZePtr{Cvoid}" [api.zeVirtualMemFree.argtypes] 2 = "PtrOrZePtr{Cvoid}" ================================================ FILE: src/accumulate.jl ================================================ import oneAPI import oneAPI: oneArray, oneAPIBackend import AcceleratedKernels as AK # Use a smaller block size on Intel GPUs to work around a scan correctness issue # with the Blelloch parallel prefix sum at larger block sizes (>=128). const _ACCUMULATE_BLOCK_SIZE = 64 # Accumulate operations using AcceleratedKernels Base.accumulate!(op, B::oneArray, A::oneArray; init = zero(eltype(A)), block_size = _ACCUMULATE_BLOCK_SIZE, kwargs...) = AK.accumulate!(op, B, A, oneAPIBackend(); init, block_size, kwargs...) Base.accumulate(op, A::oneArray; init = zero(eltype(A)), block_size = _ACCUMULATE_BLOCK_SIZE, kwargs...) = AK.accumulate(op, A, oneAPIBackend(); init, block_size, kwargs...) Base.cumsum(src::oneArray; block_size = _ACCUMULATE_BLOCK_SIZE, kwargs...) = AK.cumsum(src, oneAPIBackend(); block_size, kwargs...) Base.cumprod(src::oneArray; block_size = _ACCUMULATE_BLOCK_SIZE, kwargs...) = AK.cumprod(src, oneAPIBackend(); block_size, kwargs...) ================================================ FILE: src/array.jl ================================================ export oneArray, oneVector, oneMatrix, oneVecOrMat, is_device, is_shared, is_host ## array type function hasfieldcount(@nospecialize(dt)) try fieldcount(dt) catch return false end return true end function contains_eltype(T, X) if T === X return true elseif T isa Union for U in Base.uniontypes(T) contains_eltype(U, X) && return true end elseif hasfieldcount(T) for U in fieldtypes(T) contains_eltype(U, X) && return true end end return false end function check_eltype(T) Base.allocatedinline(T) || error("oneArray only supports element types that are stored inline") Base.isbitsunion(T) && error("oneArray does not yet support isbits-union arrays") if oneL0.module_properties(device()).fp16flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP16 != oneL0.ZE_DEVICE_MODULE_FLAG_FP16 contains_eltype(T, Float16) && error("Float16 is not supported on this device") end if oneL0.module_properties(device()).fp64flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP64 != oneL0.ZE_DEVICE_MODULE_FLAG_FP64 contains_eltype(T, Float64) && error("Float64 is not supported on this device") end end """ oneArray{T,N,B} <: AbstractGPUArray{T,N} N-dimensional dense array type for Intel GPU programming using oneAPI and Level Zero. # Type Parameters - `T`: Element type (must be stored inline, no isbits-unions) - `N`: Number of dimensions - `B`: Buffer type, one of: - `oneL0.DeviceBuffer`: GPU device memory (default, not CPU-accessible) - `oneL0.SharedBuffer`: Unified shared memory (CPU and GPU accessible) - `oneL0.HostBuffer`: Pinned host memory (CPU-accessible, GPU-visible) # Memory Types - **Device memory** (default): Fastest GPU access, not directly accessible from CPU - **Shared memory**: Accessible from both CPU and GPU, with unified virtual addressing - **Host memory**: CPU memory that's visible to the GPU, useful for staging Use [`is_device`](@ref), [`is_shared`](@ref), [`is_host`](@ref) to query memory type. # Examples ```julia # Create arrays with different memory types A = oneArray{Float32,2}(undef, 10, 10) # Device memory (default) B = oneArray{Float32,2,oneL0.SharedBuffer}(undef, 10, 10) # Shared memory C = oneArray{Float32,2,oneL0.HostBuffer}(undef, 10, 10) # Host memory # From existing array D = oneArray(rand(Float32, 10, 10)) # Creates device memory array # Using do-block for automatic cleanup result = oneArray{Float32}(100) do arr # Use arr... Array(arr) # Copy result back before cleanup end ``` See also: [`oneVector`](@ref), [`oneMatrix`](@ref), [`is_device`](@ref), [`is_shared`](@ref) """ mutable struct oneArray{T,N,B} <: AbstractGPUArray{T,N} data::DataRef{B} maxsize::Int # maximum data size; excluding any selector bytes offset::Int # offset of the data in the buffer, in bytes dims::Dims{N} function oneArray{T,N,B}(::UndefInitializer, dims::Dims{N}) where {T,N,B} check_eltype(T) maxsize = prod(dims) * sizeof(T) bufsize = if Base.isbitsunion(T) # type tag array past the data maxsize + prod(dims) else maxsize end ctx = context() dev = device() alignment = Base.datatype_alignment(T) data = GPUArrays.cached_alloc((oneArray, B, ctx, dev, bufsize, alignment)) do buf = allocate(B, ctx, dev, bufsize, alignment) data = DataRef(buf) do buf release(buf) end end obj = new{T,N,B}(data, maxsize, 0, dims) finalizer(unsafe_free!, obj) end function oneArray{T,N}(data::DataRef{B}, dims::Dims{N}; maxsize::Int=prod(dims) * sizeof(T), offset::Int=0) where {T,N,B} check_eltype(T) if sizeof(T) == 0 offset == 0 || error("Singleton arrays cannot have a nonzero offset") maxsize == 0 || error("Singleton arrays cannot have a size") end obj = new{T,N,B}(copy(data), maxsize, offset, dims) finalizer(unsafe_free!, obj) end end GPUArrays.storage(a::oneArray) = a.data ## alias detection Base.dataids(A::oneArray) = (UInt(pointer(A)),) Base.unaliascopy(A::oneArray) = copy(A) function Base.mightalias(A::oneArray, B::oneArray) rA = pointer(A):pointer(A)+sizeof(A) rB = pointer(B):pointer(B)+sizeof(B) return first(rA) <= first(rB) < last(rA) || first(rB) <= first(rA) < last(rB) end ## convenience constructors const oneVector{T} = oneArray{T,1} const oneMatrix{T} = oneArray{T,2} const oneVecOrMat{T} = Union{oneVector{T},oneMatrix{T}} # default to non-unified memory oneArray{T,N}(::UndefInitializer, dims::Dims{N}) where {T,N} = oneArray{T,N,oneL0.DeviceBuffer}(undef, dims) # buffer, type and dimensionality specified oneArray{T,N,B}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N,B} = oneArray{T,N,B}(undef, convert(Tuple{Vararg{Int}}, dims)) oneArray{T,N,B}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N,B} = oneArray{T,N,B}(undef, convert(Tuple{Vararg{Int}}, dims)) # type and dimensionality specified oneArray{T,N}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N} = oneArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) oneArray{T,N}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N} = oneArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) # only type specified oneArray{T}(::UndefInitializer, dims::NTuple{N,Integer}) where {T,N} = oneArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) oneArray{T}(::UndefInitializer, dims::Vararg{Integer,N}) where {T,N} = oneArray{T,N}(undef, convert(Tuple{Vararg{Int}}, dims)) # empty vector constructor oneArray{T,1,B}() where {T,B} = oneArray{T,1,B}(undef, 0) oneArray{T,1}() where {T} = oneArray{T,1}(undef, 0) # do-block constructors for (ctor, tvars) in (:oneArray => (), :(oneArray{T}) => (:T,), :(oneArray{T,N}) => (:T, :N), :(oneArray{T,N,B}) => (:T, :N, :B)) @eval begin function $ctor(f::Function, args...) where {$(tvars...)} xs = $ctor(args...) try f(xs) finally unsafe_free!(xs) end end end end Base.similar(a::oneArray{T,N,B}) where {T,N,B} = oneArray{T,N,B}(undef, size(a)) Base.similar(a::oneArray{T,<:Any,B}, dims::Base.Dims{N}) where {T,N,B} = oneArray{T,N,B}(undef, dims) Base.similar(a::oneArray{<:Any,<:Any,B}, ::Type{T}, dims::Base.Dims{N}) where {T,N,B} = oneArray{T,N,B}(undef, dims) function Base.copy(a::oneArray{T,N}) where {T,N} b = similar(a) @inbounds copyto!(b, a) end ## array interface Base.elsize(::Type{<:oneArray{T}}) where {T} = sizeof(T) Base.size(x::oneArray) = x.dims Base.sizeof(x::oneArray) = Base.elsize(x) * length(x) function context(A::oneArray) return oneL0.context(A.data[]) end function device(A::oneArray) return oneL0.device(A.data[]) end buftype(x::oneArray) = buftype(typeof(x)) buftype(::Type{<:oneArray{<:Any,<:Any,B}}) where {B} = @isdefined(B) ? B : Any """ is_device(a::oneArray) -> Bool Check if the array is stored in device memory (not directly CPU-accessible). Device memory provides the fastest GPU access but cannot be directly accessed from the CPU. See also: [`is_shared`](@ref), [`is_host`](@ref) """ is_device(a::oneArray) = isa(a.data[], oneL0.DeviceBuffer) """ is_shared(a::oneArray) -> Bool Check if the array is stored in shared (unified) memory. Shared memory is accessible from both CPU and GPU with unified virtual addressing. See also: [`is_device`](@ref), [`is_host`](@ref) """ is_shared(a::oneArray) = isa(a.data[], oneL0.SharedBuffer) """ is_host(a::oneArray) -> Bool Check if the array is stored in pinned host memory. Host memory resides on the CPU but is visible to the GPU, useful for staging data. See also: [`is_device`](@ref), [`is_shared`](@ref) """ is_host(a::oneArray) = isa(a.data[], oneL0.HostBuffer) ## derived types export oneDenseArray, oneDenseVector, oneDenseMatrix, oneDenseVecOrMat, oneStridedArray, oneStridedVector, oneStridedMatrix, oneStridedVecOrMat, oneWrappedArray, oneWrappedVector, oneWrappedMatrix, oneWrappedVecOrMat # dense arrays: stored contiguously in memory # # all common dense wrappers are currently represented as oneArray objects. # this simplifies common use cases, and greatly improves load time. const oneDenseArray{T,N} = oneArray{T,N} const oneDenseVector{T} = oneDenseArray{T,1} const oneDenseMatrix{T} = oneDenseArray{T,2} const oneDenseVecOrMat{T} = Union{oneDenseVector{T}, oneDenseMatrix{T}} # XXX: these dummy aliases (oneDenseArray=oneArray) break alias printing, as # `Base.print_without_params` only handles the case of a single alias. # strided arrays const oneStridedSubArray{T,N,I<:Tuple{Vararg{Union{Base.RangeIndex, Base.ReshapedUnitRange, Base.AbstractCartesianIndex}}}} = SubArray{T,N,<:oneArray,I} const oneStridedArray{T,N} = Union{oneArray{T,N}, oneStridedSubArray{T,N}} const oneStridedVector{T} = oneStridedArray{T,1} const oneStridedMatrix{T} = oneStridedArray{T,2} const oneStridedVecOrMat{T} = Union{oneStridedVector{T}, oneStridedMatrix{T}} @inline function Base.pointer(x::oneStridedArray{T}, i::Integer=1; type=oneL0.DeviceBuffer) where T PT = if type == oneL0.DeviceBuffer ZePtr{T} elseif type == oneL0.HostBuffer Ptr{T} else error("unknown memory type") end Base.unsafe_convert(PT, x) + Base._memory_offset(x, i) end # anything that's (secretly) backed by a oneArray const oneWrappedArray{T,N} = Union{oneArray{T,N}, WrappedArray{T,N,oneArray,oneArray{T,N}}} const oneWrappedVector{T} = oneWrappedArray{T,1} const oneWrappedMatrix{T} = oneWrappedArray{T,2} const oneWrappedVecOrMat{T} = Union{oneWrappedVector{T}, oneWrappedMatrix{T}} ## interop with other arrays @inline function oneArray{T,N,B}(xs::AbstractArray{<:Any,N}) where {T,N,B} A = oneArray{T,N,B}(undef, size(xs)) copyto!(A, convert(Array{T}, xs)) return A end @inline oneArray{T,N}(xs::AbstractArray{<:Any,N}) where {T,N} = oneArray{T,N,oneL0.DeviceBuffer}(xs) @inline oneArray{T,N}(xs::oneArray{<:Any,N,B}) where {T,N,B} = oneArray{T,N,B}(xs) # underspecified constructors oneArray{T}(xs::AbstractArray{S,N}) where {T,N,S} = oneArray{T,N}(xs) (::Type{oneArray{T,N} where T})(x::AbstractArray{S,N}) where {S,N} = oneArray{S,N}(x) oneArray(A::AbstractArray{T,N}) where {T,N} = oneArray{T,N}(A) # idempotency oneArray{T,N,B}(xs::oneArray{T,N,B}) where {T,N,B} = xs oneArray{T,N}(xs::oneArray{T,N,B}) where {T,N,B} = xs # Level Zero references oneL0.ZeRef(x::Any) = oneL0.ZeRefArray(oneArray([x])) oneL0.ZeRef{T}(x) where {T} = oneL0.ZeRefArray{T}(oneArray(T[x])) oneL0.ZeRef{T}() where {T} = oneL0.ZeRefArray(oneArray{T}(undef, 1)) ## conversions Base.convert(::Type{T}, x::T) where T <: oneArray = x ## interop with libraries function Base.unsafe_convert(::Type{Ptr{T}}, x::oneArray{T}) where {T} buf = x.data[] if is_device(x) throw(ArgumentError("cannot take the CPU address of a $(typeof(x))")) end convert(Ptr{T}, x.data[]) + x.offset end function Base.unsafe_convert(::Type{ZePtr{T}}, x::oneArray{T}) where {T} convert(ZePtr{T}, x.data[]) + x.offset end ## indexing # Host-accessible arrays can be indexed from CPU, bypassing GPUArrays restrictions function Base.getindex(x::oneArray{<:Any, <:Any, <:Union{oneL0.HostBuffer, oneL0.SharedBuffer}}, I::Int) @boundscheck checkbounds(x, I) return unsafe_load(pointer(x, I; type = oneL0.HostBuffer)) end function Base.setindex!(x::oneArray{<:Any, <:Any, <:Union{oneL0.HostBuffer, oneL0.SharedBuffer}}, v, I::Int) @boundscheck checkbounds(x, I) return unsafe_store!(pointer(x, I; type = oneL0.HostBuffer), v) end ## interop with GPU arrays function Base.unsafe_convert(::Type{oneDeviceArray{T,N,AS.CrossWorkgroup}}, a::oneArray{T,N}) where {T,N} oneDeviceArray{T,N,AS.CrossWorkgroup}(size(a), reinterpret(LLVMPtr{T,AS.CrossWorkgroup}, pointer(a)), a.maxsize - a.offset) end ## memory copying typetagdata(a::Array, i=1) = ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), a) + i - 1 function typetagdata(a::oneArray, i=1) # for zero-size element types (e.g. singleton unions), the byte offset # is always zero, so the corresponding element offset is also zero elem_offset = iszero(Base.elsize(a)) ? 0 : a.offset ÷ Base.elsize(a) return convert(ZePtr{UInt8}, a.data[]) + a.maxsize + elem_offset + i - 1 end function Base.copyto!(dest::oneArray{T}, doffs::Integer, src::Array{T}, soffs::Integer, n::Integer) where T n==0 && return dest @boundscheck checkbounds(dest, doffs) @boundscheck checkbounds(dest, doffs+n-1) @boundscheck checkbounds(src, soffs) @boundscheck checkbounds(src, soffs+n-1) unsafe_copyto!(context(dest), device(), dest, doffs, src, soffs, n) return dest end Base.copyto!(dest::oneDenseArray{T}, src::Array{T}) where {T} = copyto!(dest, 1, src, 1, length(src)) function Base.copyto!(dest::Array{T}, doffs::Integer, src::oneDenseArray{T}, soffs::Integer, n::Integer) where T n==0 && return dest @boundscheck checkbounds(dest, doffs) @boundscheck checkbounds(dest, doffs+n-1) @boundscheck checkbounds(src, soffs) @boundscheck checkbounds(src, soffs+n-1) unsafe_copyto!(context(src), device(), dest, doffs, src, soffs, n) return dest end Base.copyto!(dest::Array{T}, src::oneDenseArray{T}) where {T} = copyto!(dest, 1, src, 1, length(src)) function Base.copyto!(dest::oneDenseArray{T}, doffs::Integer, src::oneDenseArray{T}, soffs::Integer, n::Integer) where T n==0 && return dest @boundscheck checkbounds(dest, doffs) @boundscheck checkbounds(dest, doffs+n-1) @boundscheck checkbounds(src, soffs) @boundscheck checkbounds(src, soffs+n-1) @assert context(dest) == context(src) unsafe_copyto!(context(dest), device(), dest, doffs, src, soffs, n) return dest end Base.copyto!(dest::oneDenseArray{T}, src::oneDenseArray{T}) where {T} = copyto!(dest, 1, src, 1, length(src)) function Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice, dest::oneDenseArray{T}, doffs, src::Array{T}, soffs, n) where T GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n) if Base.isbitsunion(T) # copy selector bytes error("oneArray does not yet support isbits-union arrays") end return dest end function Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice, dest::Array{T}, doffs, src::oneDenseArray{T}, soffs, n) where T GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n) if Base.isbitsunion(T) # copy selector bytes error("oneArray does not yet support isbits-union arrays") end # copies to the host are synchronizing synchronize(global_queue(context(src), device())) return dest end function Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice, dest::oneDenseArray{T}, doffs, src::oneDenseArray{T}, soffs, n) where T GC.@preserve src dest unsafe_copyto!(ctx, dev, pointer(dest, doffs), pointer(src, soffs), n) if Base.isbitsunion(T) # copy selector bytes error("oneArray does not yet support isbits-union arrays") end return dest end # between Array and host-accessible oneArray function Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice, dest::oneDenseArray{T,<:Any,<:Union{oneL0.SharedBuffer,oneL0.HostBuffer}}, doffs, src::Array{T}, soffs, n) where T # maintain queue-ordered semantics synchronize(global_queue(ctx, dev)) if Base.isbitsunion(T) # copy selector bytes error("oneArray does not yet support isbits-union arrays") end GC.@preserve src dest begin ptr = pointer(dest, doffs) unsafe_copyto!(pointer(dest, doffs; type=oneL0.HostBuffer), pointer(src, soffs), n) if Base.isbitsunion(T) # copy selector bytes error("oneArray does not yet support isbits-union arrays") end end return dest end function Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice, dest::Array{T}, doffs, src::oneDenseArray{T,<:Any,<:Union{oneL0.SharedBuffer,oneL0.HostBuffer}}, soffs, n) where T # maintain queue-ordered semantics synchronize(global_queue(ctx, dev)) if Base.isbitsunion(T) # copy selector bytes error("oneArray does not yet support isbits-union arrays") end GC.@preserve src dest begin ptr = pointer(dest, doffs) unsafe_copyto!(pointer(dest, doffs), pointer(src, soffs; type=oneL0.HostBuffer), n) if Base.isbitsunion(T) # copy selector bytes error("oneArray does not yet support isbits-union arrays") end end return dest end ## gpu array adaptor # We don't convert isbits types in `adapt`, since they are already # considered GPU-compatible. Adapt.adapt_storage(::Type{oneArray}, xs::AT) where {AT<:AbstractArray} = isbitstype(AT) ? xs : convert(oneArray, xs) # if an element type is specified, convert to it Adapt.adapt_storage(::Type{<:oneArray{T}}, xs::AT) where {T, AT<:AbstractArray} = isbitstype(AT) ? xs : convert(oneArray{T}, xs) ## utilities zeros(T::Type, dims...) = fill!(oneArray{T}(undef, dims...), zero(T)) ones(T::Type, dims...) = fill!(oneArray{T}(undef, dims...), one(T)) zeros(dims...) = zeros(Float64, dims...) ones(dims...) = ones(Float64, dims...) fill(v, dims...) = fill!(oneArray{typeof(v)}(undef, dims...), v) fill(v, dims::Dims) = fill!(oneArray{typeof(v)}(undef, dims...), v) function Base.fill!(A::oneDenseArray{T}, val) where T length(A) == 0 && return A val = convert(T, val) sizeof(T) == 0 && return A # execute! is async, so we need to allocate the pattern in USM memory # and keep it alive until the operation completes. buf = oneL0.host_alloc(context(A), sizeof(T), Base.datatype_alignment(T)) unsafe_store!(convert(Ptr{T}, buf), val) unsafe_fill!(context(A), device(), pointer(A), convert(ZePtr{T}, buf), length(A)) synchronize(global_queue(context(A), device())) oneL0.free(buf) A end ## derived arrays function GPUArrays.derive(::Type{T}, a::oneArray, dims::Dims{N}, offset::Int) where {T,N} if sizeof(T) == 0 Base.elsize(a) == 0 || error("Cannot derive a singleton array from non-singleton inputs") end offset = a.offset + offset * sizeof(T) oneArray{T,N}(a.data, dims; a.maxsize, offset) end ## views device(a::SubArray) = device(parent(a)) context(a::SubArray) = context(parent(a)) # pointer conversions function Base.unsafe_convert(::Type{ZePtr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Base.RangeIndex}}}) where {T,N,P} return Base.unsafe_convert(ZePtr{T}, parent(V)) + Base._memory_offset(V.parent, map(first, V.indices)...) end function Base.unsafe_convert(::Type{ZePtr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{Base.RangeIndex,Base.ReshapedUnitRange}}}}) where {T,N,P} return Base.unsafe_convert(ZePtr{T}, parent(V)) + (Base.first_index(V)-1)*sizeof(T) end ## PermutedDimsArray device(a::Base.PermutedDimsArray) = device(parent(a)) context(a::Base.PermutedDimsArray) = context(parent(a)) Base.unsafe_convert(::Type{ZePtr{T}}, A::PermutedDimsArray) where {T} = Base.unsafe_convert(ZePtr{T}, parent(A)) ## unsafe_wrap """ unsafe_wrap(Array, arr::oneArray{_,_,oneL0.SharedBuffer}) Wrap a Julia `Array` around the buffer that backs a `oneArray`. This is only possible if the GPU array is backed by a shared buffer, i.e. if it was created with `oneArray{T}(undef, ...)`. """ function Base.unsafe_wrap(::Type{Array}, arr::oneArray{T,N,oneL0.SharedBuffer}) where {T,N} # TODO: can we make this more convenient by increasing the buffer's refcount and using # a finalizer on the Array? does that work when taking views etc of the Array? ptr = reinterpret(Ptr{T}, pointer(arr)) unsafe_wrap(Array, ptr, size(arr)) end ## resizing """ resize!(a::oneVector, n::Integer) Resize `a` to contain `n` elements. If `n` is smaller than the current collection length, the first `n` elements will be retained. If `n` is larger, the new elements are not guaranteed to be initialized. """ function Base.resize!(a::oneVector{T}, n::Integer) where {T} # TODO: add additional space to allow for quicker resizing maxsize = n * sizeof(T) bufsize = if isbitstype(T) maxsize else # type tag array past the data maxsize + n end # replace the data with a new one. this 'unshares' the array. # as a result, we can safely support resizing unowned buffers. ctx = context(a) dev = device(a) buf = allocate(buftype(a), ctx, dev, bufsize, Base.datatype_alignment(T)) ptr = convert(ZePtr{T}, buf) m = min(length(a), n) if m > 0 unsafe_copyto!(ctx, dev, ptr, pointer(a), m) end new_data = DataRef(buf) do buf free(buf) end unsafe_free!(a) a.data = new_data a.dims = (n,) a.maxsize = maxsize a.offset = 0 a end ================================================ FILE: src/broadcast.jl ================================================ import Base.Broadcast: BroadcastStyle, Broadcasted struct oneArrayStyle{N,B} <: AbstractGPUArrayStyle{N} end oneArrayStyle{M,B}(::Val{N}) where {N,M,B} = oneArrayStyle{N,B}() # identify the broadcast style of a (wrapped) oneArray BroadcastStyle(::Type{<:oneArray{T, N, B}}) where {T, N, B} = oneArrayStyle{N, B}() BroadcastStyle(W::Type{<:oneWrappedArray{T, N}}) where {T, N} = oneArrayStyle{N, buftype(Adapt.unwrap_type(W))}() # when we are dealing with different buffer styles, we cannot know # which one is better, so use shared memory BroadcastStyle( ::oneArrayStyle{N, B1}, ::oneArrayStyle{N, B2}, ) where {N,B1,B2} = oneArrayStyle{N, oneL0.SharedBuffer}() # allocation of output arrays Base.similar(bc::Broadcasted{oneArrayStyle{N,B}}, ::Type{T}, dims) where {T,N,B} = similar(oneArray{T,length(dims),B}, dims) ================================================ FILE: src/compiler/compilation.jl ================================================ ## gpucompiler interface implementation struct oneAPICompilerParams <: AbstractCompilerParams end const oneAPICompilerConfig = CompilerConfig{SPIRVCompilerTarget, oneAPICompilerParams} const oneAPICompilerJob = CompilerJob{SPIRVCompilerTarget,oneAPICompilerParams} GPUCompiler.runtime_module(::oneAPICompilerJob) = oneAPI GPUCompiler.method_table_view(job::oneAPICompilerJob) = GPUCompiler.StackedMethodTable(job.world, method_table, SPIRVIntrinsics.method_table) # filter out OpenCL built-ins # TODO: eagerly lower these using the translator API GPUCompiler.isintrinsic(job::oneAPICompilerJob, fn::String) = invoke(GPUCompiler.isintrinsic, Tuple{CompilerJob{SPIRVCompilerTarget}, typeof(fn)}, job, fn) || in(fn, known_intrinsics) || contains(fn, "__spirv_") function GPUCompiler.finish_module!(job::oneAPICompilerJob, mod::LLVM.Module, entry::LLVM.Function) entry = invoke(GPUCompiler.finish_module!, Tuple{CompilerJob{SPIRVCompilerTarget}, typeof(mod), typeof(entry)}, job, mod, entry) # OpenCL 2.0 push!(metadata(mod)["opencl.ocl.version"], MDNode([ConstantInt(Int32(2)), ConstantInt(Int32(0))])) # SPIR-V 1.5 push!(metadata(mod)["opencl.spirv.version"], MDNode([ConstantInt(Int32(1)), ConstantInt(Int32(5))])) return entry end # finish_ir! runs later in the pipeline, after optimizations that create nested insertvalue function GPUCompiler.finish_ir!(job::oneAPICompilerJob, mod::LLVM.Module, entry::LLVM.Function) entry = invoke(GPUCompiler.finish_ir!, Tuple{CompilerJob{SPIRVCompilerTarget}, typeof(mod), typeof(entry)}, job, mod, entry) # FIX: Flatten nested insertvalue instructions to work around SPIR-V bug # See: https://github.com/JuliaGPU/oneAPI.jl/issues/259 # Intel's SPIR-V runtime has a bug where OpCompositeInsert with nested # indices (e.g., "1 0") corrupts adjacent struct fields. flatten_nested_insertvalue!(mod) return entry end # Flatten nested insertvalue instructions # This works around a bug in Intel's SPIR-V runtime where OpCompositeInsert # with nested array indices corrupts adjacent struct fields. function flatten_nested_insertvalue!(mod::LLVM.Module) changed = false count = 0 for f in functions(mod) isempty(blocks(f)) && continue for bb in blocks(f) # Collect instructions to process (can't modify while iterating) to_process = LLVM.Instruction[] for inst in instructions(bb) # Check if this is an insertvalue with nested indices if LLVM.API.LLVMGetInstructionOpcode(inst) == LLVM.API.LLVMInsertValue num_indices = LLVM.API.LLVMGetNumIndices(inst) if num_indices > 1 push!(to_process, inst) end end end # Flatten each nested insertvalue for inst in to_process try flatten_insert!(inst) changed = true count += 1 catch e @warn "Failed to flatten nested insertvalue" exception=(e, catch_backtrace()) end end end end return changed end function flatten_insert!(inst::LLVM.Instruction) # Transform: insertvalue %base, %val, i, j, k... # Into: extractvalue %base, i # insertvalue %extracted, %val, j, k... # insertvalue %base, %modified, i composite = LLVM.operands(inst)[1] value = LLVM.operands(inst)[2] num_indices = LLVM.API.LLVMGetNumIndices(inst) idx_ptr = LLVM.API.LLVMGetIndices(inst) indices = unsafe_wrap(Array, idx_ptr, num_indices) builder = LLVM.IRBuilder() LLVM.position!(builder, inst) # Strategy: Recursively extract and insert for each nesting level # For insertvalue %base, %val, i, j, k # Do: %tmp1 = extractvalue %base, i # %tmp2 = extractvalue %tmp1, j # %tmp3 = insertvalue %tmp2, %val, k # %tmp4 = insertvalue %tmp1, %tmp3, j # %result = insertvalue %base, %tmp4, i # But that's complex. Simpler approach for 2-3 levels: # Just do one level of flattening at a time first_idx = indices[1] rest_indices = indices[2:end] # Extract the first level extracted = LLVM.extract_value!(builder, composite, first_idx) # Now insert into the extracted value using remaining indices # The LLVM IR builder will handle this correctly inserted = extracted if length(rest_indices) == 1 # Simple case: just one more level inserted = LLVM.insert_value!(builder, extracted, value, rest_indices[1]) else # Multiple levels: need to extract down, insert, then insert back up # For now, recursively extract to the deepest level temps = [extracted] for i in 1:(length(rest_indices)-1) temp = LLVM.extract_value!(builder, temps[end], rest_indices[i]) push!(temps, temp) end # Insert the value at the deepest level inserted = LLVM.insert_value!(builder, temps[end], value, rest_indices[end]) # Insert back up the chain for i in (length(rest_indices)-1):-1:1 inserted = LLVM.insert_value!(builder, temps[i], inserted, rest_indices[i]) end end # Insert the modified structure back into the original result = LLVM.insert_value!(builder, composite, inserted, first_idx) LLVM.replace_uses!(inst, result) LLVM.API.LLVMInstructionEraseFromParent(inst) LLVM.dispose(builder) end ## compiler implementation (cache, configure, compile, and link) # cache of compilation caches, per device const _compiler_caches = Dict{ZeDevice, Dict{Any, Any}}() function compiler_cache(dev::ZeDevice) cache = get(_compiler_caches, dev, nothing) if cache === nothing cache = Dict{Any, Any}() _compiler_caches[dev] = cache end return cache end # cache of compiler configurations, per device (but additionally configurable via kwargs) const _toolchain = Ref{Any}() const _compiler_configs = Dict{UInt, oneAPICompilerConfig}() function compiler_config(dev; kwargs...) h = hash(dev, hash(kwargs)) config = get(_compiler_configs, h, nothing) if config === nothing config = _compiler_config(dev; kwargs...) _compiler_configs[h] = config end return config end @noinline function _compiler_config(dev; kernel=true, name=nothing, always_inline=false, kwargs...) supports_fp16 = oneL0.module_properties(device()).fp16flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP16 == oneL0.ZE_DEVICE_MODULE_FLAG_FP16 supports_fp64 = oneL0.module_properties(device()).fp64flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP64 == oneL0.ZE_DEVICE_MODULE_FLAG_FP64 # TODO: emit printf format strings in constant memory extensions = String[ "SPV_EXT_relaxed_printf_string_address_space", "SPV_EXT_shader_atomic_float_add" ] # create GPUCompiler objects target = SPIRVCompilerTarget(; extensions, supports_fp16, supports_fp64, kwargs...) params = oneAPICompilerParams() CompilerConfig(target, params; kernel, name, always_inline) end # compile to executable machine code function compile(@nospecialize(job::CompilerJob)) # TODO: on 1.9, this actually creates a context. cache those. asm, meta = JuliaContext() do ctx GPUCompiler.compile(:obj, job) end (image=asm, entry=LLVM.name(meta.entry)) end # link into an executable kernel function link(@nospecialize(job::CompilerJob), compiled) ctx = context() dev = device() mod = ZeModule(ctx, dev, compiled.image) kernels(mod)[compiled.entry] end ================================================ FILE: src/compiler/execution.jl ================================================ export @oneapi, zefunction, kernel_convert ## high-level @oneapi interface const MACRO_KWARGS = [:launch] const COMPILER_KWARGS = [:kernel, :name, :always_inline] const LAUNCH_KWARGS = [:groups, :items, :queue] """ @oneapi [kwargs...] kernel(args...) High-level interface for launching Julia kernels on Intel GPUs using oneAPI. This macro compiles a Julia function to SPIR-V, prepares the arguments, and optionally launches the kernel on the GPU. # Keyword Arguments ## Macro Keywords (compile-time) - `launch::Bool=true`: Whether to launch the kernel immediately. If `false`, returns the compiled kernel object without executing it. ## Compiler Keywords - `kernel::Bool=false`: Whether to compile as a kernel (true) or device function (false) - `name::Union{String,Nothing}=nothing`: Explicit name for the kernel - `always_inline::Bool=false`: Whether to always inline device functions ## Launch Keywords (runtime) - `groups`: Number of workgroups (required). Can be an integer or tuple. - `items`: Number of work-items per workgroup (required). Can be an integer or tuple. - `queue::ZeCommandQueue=global_queue(...)`: Command queue to submit to. # Examples ```julia # Simple vector addition kernel function vadd(a, b, c) i = get_global_id() @inbounds c[i] = a[i] + b[i] return end a = oneArray(rand(Float32, 1024)) b = oneArray(rand(Float32, 1024)) c = similar(a) # Launch with 4 workgroups of 256 items each @oneapi groups=4 items=256 vadd(a, b, c) # Compile without launching kernel = @oneapi launch=false vadd(a, b, c) kernel(a, b, c; groups=4, items=256) # Launch later ``` See also: `zefunction`, `kernel_convert` """ macro oneapi(ex...) call = ex[end] kwargs = map(ex[1:end-1]) do kwarg if kwarg isa Symbol :($kwarg = $kwarg) elseif Meta.isexpr(kwarg, :(=)) kwarg else throw(ArgumentError("Invalid keyword argument '$kwarg'")) end end # destructure the kernel call Meta.isexpr(call, :call) || throw(ArgumentError("second argument to @oneapi should be a function call")) f = call.args[1] args = call.args[2:end] code = quote end vars, var_exprs = assign_args!(code, args) # group keyword argument macro_kwargs, compiler_kwargs, call_kwargs, other_kwargs = split_kwargs(kwargs, MACRO_KWARGS, COMPILER_KWARGS, LAUNCH_KWARGS) if !isempty(other_kwargs) key,val = first(other_kwargs).args throw(ArgumentError("Unsupported keyword argument '$key'")) end # handle keyword arguments that influence the macro's behavior launch = true for kwarg in macro_kwargs key,val = kwarg.args if key == :launch isa(val, Bool) || throw(ArgumentError("`launch` keyword argument to @cuda should be a constant value")) launch = val::Bool else throw(ArgumentError("Unsupported keyword argument '$key'")) end end if !launch && !isempty(call_kwargs) error("@oneapi with launch=false does not support launch-time keyword arguments; use them when calling the kernel") end # FIXME: macro hygiene wrt. escaping kwarg values (this broke with 1.5) # we esc() the whole thing now, necessitating gensyms... @gensym f_var kernel_f kernel_args kernel_tt kernel # convert the arguments, call the compiler and launch the kernel # while keeping the original arguments alive push!(code.args, quote $f_var = $f GC.@preserve $(vars...) $f_var begin $kernel_f = $kernel_convert($f_var) $kernel_args = map($kernel_convert, ($(var_exprs...),)) $kernel_tt = Tuple{map(Core.Typeof, $kernel_args)...} $kernel = $zefunction($kernel_f, $kernel_tt; $(compiler_kwargs...)) if $launch $kernel($(var_exprs...); $(call_kwargs...)) end $kernel end end) return esc(quote let $code end end) end ## argument conversion struct KernelAdaptor end # convert oneAPI host pointers to device pointers Adapt.adapt_storage(to::KernelAdaptor, p::ZePtr{T}) where {T} = reinterpret(Ptr{T}, p) # convert oneAPI host arrays to device arrays Adapt.adapt_storage(::KernelAdaptor, xs::oneArray{T,N}) where {T,N} = Base.unsafe_convert(oneDeviceArray{T,N,AS.CrossWorkgroup}, xs) # Base.RefValue isn't GPU compatible, so provide a compatible alternative. # TODO: port improvements from CUDA.jl struct ZeRefValue{T} <: Ref{T} x::T end Base.getindex(r::ZeRefValue) = r.x Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue) = ZeRefValue(adapt(to, r[])) # broadcast sometimes passes a ref(type), resulting in a GPU-incompatible DataType box. # avoid that by using a special kind of ref that knows about the boxed type. struct oneRefType{T} <: Ref{DataType} end Base.getindex(r::oneRefType{T}) where T = T Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue{<:Union{DataType,Type}}) = oneRefType{r[]}() # case where type is the function being broadcasted Adapt.adapt_structure(to::KernelAdaptor, bc::Broadcast.Broadcasted{Style, <:Any, Type{T}}) where {Style, T} = Broadcast.Broadcasted{Style}((x...) -> T(x...), adapt(to, bc.args), bc.axes) """ kernel_convert(x) This function is called for every argument to be passed to a kernel, allowing it to be converted to a GPU-friendly format. By default, the function does nothing and returns the input object `x` as-is. Do not add methods to this function, but instead extend the underlying Adapt.jl package and register methods for the the `oneAPI.KernelAdaptor` type. """ kernel_convert(arg) = adapt(KernelAdaptor(), arg) ## abstract kernel functionality abstract type AbstractKernel{F,TT} end @inline @generated function call(kernel::AbstractKernel{F,TT}, args...; call_kwargs...) where {F,TT} sig = Tuple{F, TT.parameters...} # Base.signature_type with a function type args = (:(kernel.f), (:( args[$i] ) for i in 1:length(args))...) # filter out ghost arguments that shouldn't be passed predicate = dt -> isghosttype(dt) || Core.Compiler.isconstType(dt) to_pass = map(!predicate, sig.parameters) call_t = Type[x[1] for x in zip(sig.parameters, to_pass) if x[2]] call_args = Union{Expr,Symbol}[x[1] for x in zip(args, to_pass) if x[2]] # replace non-isbits arguments (they should be unused, or compilation would have failed) for (i,dt) in enumerate(call_t) if !isbitstype(dt) call_t[i] = Ptr{Any} call_args[i] = :C_NULL end end # finalize types call_tt = Base.to_tuple_type(call_t) quote onecall(kernel.fun, $call_tt, $(call_args...); call_kwargs...) end end ## host-side kernels struct HostKernel{F,TT} <: AbstractKernel{F,TT} f::F fun::ZeKernel end function launch_configuration(kernel::HostKernel{F,TT}) where {F,TT} # Level Zero's zeKernelSuggestGroupSize provides a launch configuration # that exactly cover the input size. This can result in very awkward # configurations, so roll our own version that behaves like CUDA's # occupancy API and assumes the kernel still does bounds checking. kernel_props = oneL0.properties(kernel.fun) group_size = if kernel_props.maxGroupSize !== missing kernel_props.maxGroupSize else # without the MAX_GROUP_SIZE extension, we need to be conservative dev = kernel.fun.mod.device compute_props = oneL0.compute_properties(dev) max_size = compute_props.maxTotalGroupSize ## when the kernel uses many registers (which we can't query without ## extensions that landed _after_ MAX_GROUP_SIZE, so don't bother) ## the groupsize should be halved group_size = max_size ÷ 2 end # TODO: align the group size based on preferredGroupSize return group_size end ## host-side API const zefunction_lock = ReentrantLock() function zefunction(f::F, tt::TT=Tuple{}; kwargs...) where {F,TT} dev = device() Base.@lock zefunction_lock begin # compile the function cache = compiler_cache(dev) source = methodinstance(F, tt) config = compiler_config(dev; kwargs...)::oneAPICompilerConfig fun = GPUCompiler.cached_compilation(cache, source, config, compile, link) # create a callable object that captures the function instance. we don't need to think # about world age here, as GPUCompiler already does and will return a different object h = hash(fun, hash(f, hash(tt))) kernel = get(_kernel_instances, h, nothing) if kernel === nothing # create the kernel state object kernel = HostKernel{F,tt}(f, fun) _kernel_instances[h] = kernel end return kernel::HostKernel{F,tt} end end # cache of kernel instances const _kernel_instances = Dict{UInt, Any}() @inline function onecall(kernel::ZeKernel, tt, args...; groups::ZeDim=1, items::ZeDim=1, queue::ZeCommandQueue=global_queue(context(), device())) for (i, arg) in enumerate(args) oneL0.arguments(kernel)[i] = arg end groupsize!(kernel, items) execute!(queue) do list append_launch!(list, kernel, groups) end end function (kernel::HostKernel)(args...; kwargs...) call(kernel, map(kernel_convert, args)...; kwargs...) end ## TODO: device-side kernels ================================================ FILE: src/compiler/reflection.jl ================================================ # code reflection entry-points # TODO: get and disassemble the native binary using oneL0 # # code_* replacements # # function to split off certain kwargs for selective forwarding, at run time. # `@oneapi` does something similar at parse time, using `GPUCompiler.split_kwargs`. function split_kwargs_runtime(kwargs, wanted::Vector{Symbol}) remaining = Dict{Symbol, Any}() extracted = Dict{Symbol, Any}() for (key, value) in kwargs if key in wanted extracted[key] = value else remaining[key] = value end end return extracted, remaining end for method in (:code_typed, :code_warntype, :code_llvm, :code_native) # only code_typed doesn't take a io argument args = method == :code_typed ? (:job,) : (:io, :job) @eval begin function $method(io::IO, @nospecialize(func), @nospecialize(types); kernel::Bool=false, kwargs...) compiler_kwargs, kwargs = split_kwargs_runtime(kwargs, COMPILER_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types)) config = compiler_config(device(); kernel, compiler_kwargs...) job = CompilerJob(source, config) GPUCompiler.$method($(args...); kwargs...) end $method(@nospecialize(func), @nospecialize(types); kwargs...) = $method(stdout, func, types; kwargs...) end end const code_spirv = code_native # # @device_code_* functions # export @device_code_lowered, @device_code_typed, @device_code_warntype, @device_code_llvm, @device_code_spirv, @device_code # forward to GPUCompiler @eval $(Symbol("@device_code_lowered")) = $(getfield(GPUCompiler, Symbol("@device_code_lowered"))) @eval $(Symbol("@device_code_typed")) = $(getfield(GPUCompiler, Symbol("@device_code_typed"))) @eval $(Symbol("@device_code_warntype")) = $(getfield(GPUCompiler, Symbol("@device_code_warntype"))) @eval $(Symbol("@device_code_llvm")) = $(getfield(GPUCompiler, Symbol("@device_code_llvm"))) @eval $(Symbol("@device_code_spirv")) = $(getfield(GPUCompiler, Symbol("@device_code_native"))) @eval $(Symbol("@device_code")) = $(getfield(GPUCompiler, Symbol("@device_code"))) # # other # """ return_type(f, tt) -> r::Type Return a type `r` such that `f(args...)::r` where `args::tt`. """ function return_type(@nospecialize(func), @nospecialize(tt)) source = methodinstance(typeof(func), tt) config = compiler_config(device()) job = CompilerJob(source, config) interp = GPUCompiler.get_interpreter(job) sig = Base.signature_type(func, tt) return Core.Compiler._return_type(interp, sig) end ================================================ FILE: src/context.jl ================================================ # context management and global state # to avoid CUDA-style implicit state, where operations can fail if they are accidentally # executed in the wrong context, ownership should always be encoded in each object. # the functions below should only be used to determine initial ownership. # XXX: rework this -- it doesn't work well when altering the state export driver, driver!, device, device!, context, context!, global_queue, synchronize, is_integrated """ driver() -> ZeDriver Get the current Level Zero driver for the calling task. If no driver has been explicitly set with [`driver!`](@ref), returns the first available driver. The driver selection is task-local, allowing different Julia tasks to use different drivers. # Examples ```julia drv = driver() println("Using driver: ", drv) ``` See also: `driver!`, `drivers` """ function driver() get!(task_local_storage(), :ZeDriver) do first(drivers()) end end """ driver!(drv::ZeDriver) Set the current Level Zero driver for the calling task. This also clears the current device selection, as devices are associated with specific drivers. The driver selection is task-local, allowing different Julia tasks to use different drivers. # Arguments - `drv::ZeDriver`: The driver to use for subsequent operations. # Examples ```julia drv = drivers()[2] # Select second available driver driver!(drv) ``` See also: `driver`, `drivers` """ function driver!(drv::ZeDriver) task_local_storage(:ZeDriver, drv) delete!(task_local_storage(), :ZeDevice) end """ device() -> ZeDevice Get the current Level Zero device for the calling task. If no device has been explicitly set with [`device!`](@ref), returns the first available device for the current driver. The device selection is task-local, allowing different Julia tasks to use different devices. # Examples ```julia dev = device() println("Using device: ", dev) ``` See also: `device!`, `devices`, `driver` """ function device() get!(task_local_storage(), :ZeDevice) do first(devices(driver())) end end """ device!(dev::ZeDevice) device!(i::Int) Set the current Level Zero device for the calling task. The device selection is task-local, allowing different Julia tasks to use different devices. # Arguments - `dev::ZeDevice`: The device to use for subsequent operations. - `i::Int`: Device index (1-based) from the list of available devices for the current driver. # Examples ```julia # Select by device object dev = devices()[2] device!(dev) # Select by index device!(2) # Select second device ``` See also: [`device`](@ref), [`devices`](@ref) """ function device!(drv::ZeDevice) task_local_storage(:ZeDevice, drv) end function device!(i::Int) devs = devices(driver()) if i < 1 || i > length(devs) throw(ArgumentError("Invalid device index $i (must be between 1 and $(length(devs)))")) end return device!(devs[i]) end """ is_integrated(dev::ZeDevice=device()) -> Bool Check if the given device is an integrated GPU (i.e., integrated with the host processor). Integrated GPUs share memory with the CPU and are typically found in laptop and desktop processors with integrated graphics. # Arguments - `dev::ZeDevice`: The device to check. Defaults to the current device. # Returns - `true` if the device is integrated, `false` otherwise (e.g., discrete GPU). # Examples ```julia if is_integrated() println("Running on integrated graphics") else println("Running on discrete GPU") end # Check a specific device dev = devices()[1] is_integrated(dev) ``` See also: [`device`](@ref), [`devices`](@ref) """ function is_integrated(dev::ZeDevice=device()) props = oneL0.properties(dev) return (props.flags & oneL0.ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) != 0 end const global_contexts = Dict{ZeDriver,ZeContext}() """ context() -> ZeContext Get the current Level Zero context for the calling task. If no context has been explicitly set with [`context!`](@ref), returns a global context for the current driver. Contexts manage the lifetime of resources like memory allocations and command queues. The context selection is task-local, but contexts themselves are cached globally per driver. # Examples ```julia ctx = context() println("Using context: ", ctx) ``` See also: [`context!`](@ref), [`driver`](@ref) """ function context() get!(task_local_storage(), :ZeContext) do get!(global_contexts, driver()) do ZeContext(driver()) end end end """ context!(ctx::ZeContext) Set the current Level Zero context for the calling task. The context selection is task-local, allowing different Julia tasks to use different contexts. # Arguments - `ctx::ZeContext`: The context to use for subsequent operations. # Examples ```julia ctx = ZeContext(driver()) context!(ctx) ``` See also: `context`, `ZeContext` """ function context!(ctx::ZeContext) task_local_storage(:ZeContext, ctx) end """ global_queue(ctx::ZeContext, dev::ZeDevice) -> ZeCommandQueue Get the global command queue for the given context and device. This queue is used as the default queue for executing operations, guaranteeing expected semantics when using a device on a Julia task. The queue is created with in-order execution flags, meaning commands are executed in the order they are submitted. Queues are cached per task and (context, device) pair. # Arguments - `ctx::ZeContext`: The context for the command queue. - `dev::ZeDevice`: The device for the command queue. # Returns - `ZeCommandQueue`: A cached command queue with in-order execution. # Examples ```julia ctx = context() dev = device() queue = global_queue(ctx, dev) ``` See also: `context`, `device`, `synchronize` """ function global_queue(ctx::ZeContext, dev::ZeDevice) # NOTE: dev purposefully does not default to context() or device() to stress that # objects should track ownership, and not rely on implicit global state. get!(task_local_storage(), (:ZeCommandQueue, ctx, dev)) do ZeCommandQueue(ctx, dev; flags = oneL0.ZE_COMMAND_QUEUE_FLAG_IN_ORDER) end end """ synchronize() Block the host thread until all operations on the global command queue for the current context and device have completed. This is useful for timing operations or ensuring that GPU work has finished before accessing results on the CPU. # Examples ```julia x = oneArray(rand(1000)) y = x .+ 1 synchronize() # Wait for GPU computation to complete println("GPU work completed") ``` See also: [`global_queue`](@ref), [`context`](@ref), [`device`](@ref) """ function oneL0.synchronize() oneL0.synchronize(global_queue(context(), device())) end # re-export and augment parts of oneL0 to make driver and device selection easier export drivers, devices """ devices() -> Vector{ZeDevice} devices(drv::ZeDriver) -> Vector{ZeDevice} Return a list of available Level Zero devices. Without arguments, returns devices for the current driver. With a driver argument, returns devices for that specific driver. # Examples ```julia # Get devices for current driver devs = devices() println("Found ", length(devs), " devices") # Get devices for specific driver drv = drivers()[1] devs = devices(drv) ``` See also: `device`, `device!`, `drivers` """ oneL0.devices() = devices(driver()) ## SYCL state # XXX: including objects in the TLS key is bad for performance export sycl_platform, sycl_device, sycl_context, sycl_queue function sycl_platform(drv=driver()) get!(task_local_storage(), (:SYCLPlatform, drv)) do syclPlatform(drv) end end function sycl_device(dev=device()) get!(task_local_storage(), (:SYCLDevice, dev)) do syclDevice(sycl_platform(), dev) end end function sycl_context(ctx=context(), dev=device()) get!(task_local_storage(), (:SYCLContext, dev)) do syclContext([sycl_device(dev)], ctx) end end function sycl_queue(queue) get!(task_local_storage(), (:SYCLQueue, queue.context, queue.device)) do syclQueue(sycl_context(queue.context, queue.device), sycl_device(queue.device), global_queue(queue.context, queue.device)) end end ================================================ FILE: src/device/array.jl ================================================ # Contiguous on-device arrays export oneDeviceArray, oneDeviceVector, oneDeviceMatrix, oneLocalArray ## construction # NOTE: we can't support the typical `tuple or series of integer` style construction, # because we're currently requiring a trailing pointer argument. """ oneDeviceArray{T,N,A} <: DenseArray{T,N} Device-side array type for use within GPU kernels. This type represents a view of GPU memory accessible within kernel code. Unlike [`oneArray`](@ref) which is used on the host, `oneDeviceArray` is designed for device-side operations and cannot be directly constructed on the host. # Type Parameters - `T`: Element type - `N`: Number of dimensions - `A`: Address space (typically `AS.CrossWorkgroup` for global memory) # Usage `oneDeviceArray` is typically not constructed directly. Instead, `oneArray` objects are automatically converted to `oneDeviceArray` when passed as kernel arguments. # Examples ```julia function kernel(a::oneDeviceArray{Float32,1}) i = get_global_id() @inbounds a[i] = a[i] * 2.0f0 return end a = oneArray(rand(Float32, 100)) @oneapi groups=1 items=100 kernel(a) # a is converted to oneDeviceArray ``` See also: [`oneArray`](@ref), [`oneLocalArray`](@ref), [`@oneapi`](@ref) """ struct oneDeviceArray{T,N,A} <: DenseArray{T,N} ptr::LLVMPtr{T,A} maxsize::Int dims::Dims{N} len::Int # inner constructors, fully parameterized, exact types (ie. Int not <:Integer) # TODO: deprecate; put `ptr` first like oneArray oneDeviceArray{T,N,A}(dims::Dims{N}, ptr::LLVMPtr{T,A}, maxsize::Int=prod(dims)*sizeof(T)) where {T,A,N} = new(ptr, maxsize, dims, prod(dims)) end const oneDeviceVector = oneDeviceArray{T,1,A} where {T,A} const oneDeviceMatrix = oneDeviceArray{T,2,A} where {T,A} # outer constructors, non-parameterized oneDeviceArray(dims::NTuple{N,<:Integer}, p::LLVMPtr{T,A}) where {T,A,N} = oneDeviceArray{T,N,A}(dims, p) oneDeviceArray(len::Integer, p::LLVMPtr{T,A}) where {T,A} = oneDeviceVector{T,A}((len,), p) # outer constructors, partially parameterized oneDeviceArray{T}(dims::NTuple{N,<:Integer}, p::LLVMPtr{T,A}) where {T,A,N} = oneDeviceArray{T,N,A}(dims, p) oneDeviceArray{T}(len::Integer, p::LLVMPtr{T,A}) where {T,A} = oneDeviceVector{T,A}((len,), p) oneDeviceArray{T,N}(dims::NTuple{N,<:Integer}, p::LLVMPtr{T,A}) where {T,A,N} = oneDeviceArray{T,N,A}(dims, p) oneDeviceVector{T}(len::Integer, p::LLVMPtr{T,A}) where {T,A} = oneDeviceVector{T,A}((len,), p) # outer constructors, fully parameterized oneDeviceArray{T,N,A}(dims::NTuple{N,<:Integer}, p::LLVMPtr{T,A}) where {T,A,N} = oneDeviceArray{T,N,A}(Int.(dims), p) oneDeviceVector{T,A}(len::Integer, p::LLVMPtr{T,A}) where {T,A} = oneDeviceVector{T,A}((Int(len),), p) ## array interface Base.elsize(::Type{<:oneDeviceArray{T}}) where {T} = sizeof(T) Base.size(g::oneDeviceArray) = g.dims Base.sizeof(x::oneDeviceArray) = Base.elsize(x) * length(x) # we store the array length too; computing prod(size) is expensive Base.size(g::oneDeviceArray{<:Any, 1}) = (g.len,) Base.length(g::oneDeviceArray) = g.len Base.pointer(x::oneDeviceArray{T,<:Any,A}) where {T,A} = Base.unsafe_convert(LLVMPtr{T,A}, x) @inline function Base.pointer(x::oneDeviceArray{T,<:Any,A}, i::Integer) where {T,A} Base.unsafe_convert(LLVMPtr{T,A}, x) + Base._memory_offset(x, i) end typetagdata(a::oneDeviceArray{<:Any,<:Any,A}, i=1) where {A} = reinterpret(LLVMPtr{UInt8,A}, a.ptr + a.maxsize) + i - one(i) ## conversions Base.unsafe_convert(::Type{LLVMPtr{T,A}}, x::oneDeviceArray{T,<:Any,A}) where {T,A} = x.ptr ## indexing intrinsics # TODO: how are allocations aligned by the level zero API? keep track of this # because it enables optimizations like Load Store Vectorization # (cfr. shared memory and its wider-than-datatype alignment) @generated function alignment(::oneDeviceArray{T}) where {T} if Base.isbitsunion(T) _, sz, al = Base.uniontype_layout(T) al else Base.datatype_alignment(T) end end @device_function @inline function arrayref(A::oneDeviceArray{T}, index::Integer) where {T} # simplified bounds check to avoid the OneTo construction, which calls `max` # and breaks elimination of redundant bounds checks in the generated code. #@boundscheck checkbounds(A, index) @boundscheck index <= length(A) || Base.throw_boundserror(A, index) if isbitstype(T) arrayref_bits(A, index) else #if isbitsunion(T) arrayref_union(A, index) end end @inline function arrayref_bits(A::oneDeviceArray{T}, index::Integer) where {T} align = alignment(A) unsafe_load(pointer(A), index, Val(align)) end @inline @generated function arrayref_union(A::oneDeviceArray{T,<:Any,AS}, index::Integer) where {T,AS} typs = Base.uniontypes(T) # generate code that conditionally loads a value based on the selector value. # lacking noreturn, we return T to avoid inference thinking this can return Nothing. ex = :(Base.llvmcall("unreachable", $T, Tuple{})) for (sel, typ) in Iterators.reverse(enumerate(typs)) ex = quote if selector == $(sel-1) ptr = reinterpret(LLVMPtr{$typ,AS}, data_ptr) unsafe_load(ptr, 1, Val(align)) else $ex end end end quote selector_ptr = typetagdata(A, index) selector = unsafe_load(selector_ptr) align = alignment(A) data_ptr = pointer(A, index) return $ex end end @device_function @inline function arrayset(A::oneDeviceArray{T}, x::T, index::Integer) where {T} # simplified bounds check (see `arrayref`) #@boundscheck checkbounds(A, index) @boundscheck index <= length(A) || Base.throw_boundserror(A, index) if isbitstype(T) arrayset_bits(A, x, index) else #if isbitsunion(T) arrayset_union(A, x, index) end return A end @inline function arrayset_bits(A::oneDeviceArray{T}, x::T, index::Integer) where {T} align = alignment(A) unsafe_store!(pointer(A), x, index, Val(align)) end @inline @generated function arrayset_union(A::oneDeviceArray{T,<:Any,AS}, x::T, index::Integer) where {T,AS} typs = Base.uniontypes(T) sel = findfirst(isequal(x), typs) quote selector_ptr = typetagdata(A, index) unsafe_store!(selector_ptr, $(UInt8(sel-1))) align = alignment(A) data_ptr = pointer(A, index) unsafe_store!(reinterpret(LLVMPtr{$x,AS}, data_ptr), x, 1, Val(align)) return end end @device_function @inline function unsafe_cached_load(ptr::LLVMPtr{T, A}, i::Integer, align::Val) where {T, A} # For SPIR-V/Level Zero, we don't have explicit cache control intrinsics like CUDA's __ldg # So we fall back to a regular unsafe_load. The SPIR-V compiler may still apply # appropriate optimizations based on context. unsafe_load(ptr, i, align) end @device_function @inline function const_arrayref(A::oneDeviceArray{T}, index::Integer) where {T} # simplified bounds check (see `arrayset`) #@boundscheck checkbounds(A, index) @boundscheck index <= length(A) || Base.throw_boundserror(A, index) align = alignment(A) unsafe_cached_load(pointer(A), index, Val(align)) end ## indexing Base.IndexStyle(::Type{<:oneDeviceArray}) = Base.IndexLinear() Base.@propagate_inbounds Base.getindex(A::oneDeviceArray{T}, i1::Integer) where {T} = arrayref(A, i1) Base.@propagate_inbounds Base.setindex!(A::oneDeviceArray{T}, x, i1::Integer) where {T} = arrayset(A, convert(T,x)::T, i1) # preserve the specific integer type when indexing device arrays, # to avoid extending 32-bit hardware indices to 64-bit. Base.to_index(::oneDeviceArray, i::Integer) = i # Base doesn't like Integer indices, so we need our own ND get and setindex! routines. # See also: https://github.com/JuliaLang/julia/pull/42289 Base.@propagate_inbounds Base.getindex(A::oneDeviceArray, I::Union{Integer, CartesianIndex}...) = A[Base._to_linear_index(A, to_indices(A, I)...)] Base.@propagate_inbounds Base.setindex!(A::oneDeviceArray, x, I::Union{Integer, CartesianIndex}...) = A[Base._to_linear_index(A, to_indices(A, I)...)] = x ## const indexing """ Const(A::oneDeviceArray) Mark a oneDeviceArray as constant/read-only. The invariant guaranteed is that you will not modify an oneDeviceArray for the duration of the current kernel. This API can only be used on devices with compute capability 3.5 or higher. !!! warning Experimental API. Subject to change without deprecation. """ struct Const{T,N,AS} <: DenseArray{T,N} a::oneDeviceArray{T,N,AS} end Base.Experimental.Const(A::oneDeviceArray) = Const(A) Base.IndexStyle(::Type{<:Const}) = IndexLinear() Base.size(C::Const) = size(C.a) Base.axes(C::Const) = axes(C.a) Base.@propagate_inbounds Base.getindex(A::Const, i1::Integer) = const_arrayref(A.a, i1) # deprecated Base.@propagate_inbounds ldg(A::oneDeviceArray, i1::Integer) = const_arrayref(A, i1) ## other Base.show(io::IO, a::oneDeviceVector) = print(io, "$(length(a))-element device array at $(pointer(a))") Base.show(io::IO, a::oneDeviceArray) = print(io, "$(join(a.shape, '×')) device array at $(pointer(a))") Base.show(io::IO, mime::MIME"text/plain", a::oneDeviceArray) = show(io, a) @inline function Base.iterate(A::oneDeviceArray, i=1) if (i % UInt) - 1 < length(A) (@inbounds A[i], i + 1) else nothing end end function Base.reinterpret(::Type{T}, a::oneDeviceArray{S,N,A}) where {T,S,N,A} err = _reinterpret_exception(T, a) err === nothing || throw(err) if sizeof(T) == sizeof(S) # fast case return oneDeviceArray{T,N,A}(size(a), reinterpret(LLVMPtr{T,A}, a.ptr), a.maxsize) end isize = size(a) size1 = div(isize[1]*sizeof(S), sizeof(T)) osize = tuple(size1, Base.tail(isize)...) return oneDeviceArray{T,N,A}(osize, reinterpret(LLVMPtr{T,A}, a.ptr), a.maxsize) end ## local memory export oneLocalArray """ oneLocalArray(::Type{T}, dims) Allocate local (workgroup-shared) memory within a GPU kernel. Local memory is shared among all work-items in a workgroup and provides faster access than global memory. It's useful for algorithms that require cooperation between work-items, such as reductions or matrix multiplication tiling. # Arguments - `T`: Element type - `dims`: Dimensions (must be compile-time constants) # Examples ```julia function matmul_kernel(A, B, C) # Allocate 16x16 tile in local memory tile_A = oneLocalArray(Float32, (16, 16)) tile_B = oneLocalArray(Float32, (16, 16)) # Load data into local memory local_i = get_local_id(0) local_j = get_local_id(1) tile_A[local_i, local_j] = A[...] tile_B[local_i, local_j] = B[...] barrier() # Synchronize workgroup # Compute using local memory # ... return end ``` !!! note The dimensions must be known at compile time. Local memory is limited (typically 64KB per workgroup), so large allocations may fail. See also: [`oneDeviceArray`](@ref), [`barrier`](@ref) """ @inline function oneLocalArray(::Type{T}, dims) where {T} len = prod(dims) # NOTE: this relies on const-prop to forward the literal length to the generator. # maybe we should include the size in the type, like StaticArrays does? ptr = emit_localmemory(T, Val(len)) oneDeviceArray(dims, ptr) end ================================================ FILE: src/device/atomics.jl ================================================ # Atomic operation device overrides and fallbacks # Fallback wrappers for Float32 atomic_inc!/atomic_dec! # Intel Level Zero doesn't support these directly for floating-point types, # so we implement them using atomic_add!/atomic_sub! @device_override @inline function SPIRVIntrinsics.atomic_inc!(p::LLVMPtr{Float32, AS}) where {AS} SPIRVIntrinsics.atomic_add!(p, Float32(1)) end @device_override @inline function SPIRVIntrinsics.atomic_dec!(p::LLVMPtr{Float32, AS}) where {AS} SPIRVIntrinsics.atomic_sub!(p, Float32(1)) end # Float64 fallbacks (if Float64 is supported on device) @device_override @inline function SPIRVIntrinsics.atomic_inc!(p::LLVMPtr{Float64, AS}) where {AS} SPIRVIntrinsics.atomic_add!(p, Float64(1)) end @device_override @inline function SPIRVIntrinsics.atomic_dec!(p::LLVMPtr{Float64, AS}) where {AS} SPIRVIntrinsics.atomic_sub!(p, Float64(1)) end ================================================ FILE: src/device/quirks.jl ================================================ macro print_and_throw(args...) quote @println "ERROR: " $(args...) "." throw(nothing) end end # math.jl @device_override @noinline Base.Math.throw_complex_domainerror(f::Symbol, x) = @print_and_throw "This operation requires a complex input to return a complex result" @device_override @noinline Base.Math.throw_exp_domainerror(x) = @print_and_throw "Exponentiation yielding a complex result requires a complex argument" # intfuncs.jl @device_override @noinline Base.throw_domerr_powbysq(::Any, p) = @print_and_throw "Cannot raise an integer to a negative power" @device_override @noinline Base.throw_domerr_powbysq(::Integer, p) = @print_and_throw "Cannot raise an integer to a negative power" @device_override @noinline Base.throw_domerr_powbysq(::AbstractMatrix, p) = @print_and_throw "Cannot raise an integer to a negative power" # checked.jl @device_override @noinline Base.Checked.throw_overflowerr_binaryop(op, x, y) = @print_and_throw "Binary operation overflowed" @device_override @noinline Base.Checked.throw_overflowerr_negation(op, x, y) = @print_and_throw "Negation overflowed" # boot.jl @device_override @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = @print_and_throw "Inexact conversion" # abstractarray.jl @device_override @noinline Base.throw_boundserror(A, I) = @print_and_throw "Out-of-bounds array access" # trig.jl @device_override @noinline Base.Math.sincos_domain_error(x) = @print_and_throw "sincos(x) is only defined for finite x." # diagonal.jl # XXX: remove when we have malloc import LinearAlgebra @device_override function Base.setindex!(D::LinearAlgebra.Diagonal, v, i::Int, j::Int) @boundscheck checkbounds(D, i, j) if i == j @inbounds D.diag[i] = v elseif !iszero(v) @print_and_throw "cannot set off-diagonal entry to a nonzero value" end return v end # number.jl # XXX: remove when we have malloc @device_override @inline function Base.getindex(x::Number, I::Integer...) @boundscheck all(isone, I) || @print_and_throw "Out-of-bounds access of scalar value" x end # From Metal.jl to avoid widemul and Int128 @static if VERSION >= v"1.12.0-DEV.1736" # Partially reverts JuliaLang/julia PR #56750 const BitInteger64 = Union{Int64, UInt64} @device_override function Base.checkbounds(::Type{Bool}, v::StepRange{<:BitInteger64, <:BitInteger64}, i::BitInteger64) @inline return checkindex(Bool, eachindex(IndexLinear(), v), i) end # Less accurate division for Float32 than Base Julia which relies on Float64 # https://github.com/JuliaLang/julia/pull/49637 @device_override Base.div(x::Float32, y::Float32) = trunc(x / y) end ================================================ FILE: src/device/runtime.jl ================================================ # device runtime libraries ## Julia library # reset the runtime cache from global scope, so that any change triggers recompilation GPUCompiler.reset_runtime() function signal_exception() return end function report_exception(ex) # @cuprintf(""" # ERROR: a %s was thrown during kernel execution. # Run Julia on debug level 2 for device stack traces. # """, ex) return end report_oom(sz) = return #@cuprintf("ERROR: Out of dynamic GPU memory (trying to allocate %i bytes)\n", sz) function report_exception_name(ex) # @cuprintf(""" # ERROR: a %s was thrown during kernel execution. # Stacktrace: # """, ex) return end function report_exception_frame(idx, func, file, line) # @cuprintf(" [%i] %s at %s:%i\n", idx, func, file, line) return end ## SPIRV libraries # TODO ================================================ FILE: src/gpuarrays.jl ================================================ # GPUArrays.jl interface const GLOBAL_RNGs = Dict{ZeDevice,GPUArrays.RNG}() function GPUArrays.default_rng(::Type{<:oneArray}) dev = device() get!(GLOBAL_RNGs, dev) do N = oneL0.compute_properties(dev).maxTotalGroupSize state = oneArray{NTuple{4, UInt32}}(undef, N) rng = GPUArrays.RNG(state) Random.seed!(rng) rng end end ================================================ FILE: src/indexing.jl ================================================ Base.to_index(::oneArray, I::AbstractArray{Bool}) = findall(I) if VERSION >= v"1.11.0-DEV.1157" Base.to_indices(x::oneArray, I::Tuple{AbstractArray{Bool}}) = (Base.to_index(x, I[1]),) end function _ker!(ys, bools, indices) i = get_global_id() @inbounds if i ≤ length(bools) && bools[i] ii = CartesianIndices(bools)[i] b = indices[i] # new position ys[b] = ii end return end function Base.findall(bools::oneArray{Bool}) I = keytype(bools) indices = cumsum(reshape(bools, prod(size(bools)))) n = isempty(indices) ? 0 : @allowscalar indices[end] ys = oneArray{I}(undef, n) if n > 0 kernel = @oneapi launch = false _ker!(ys, bools, indices) group_size = launch_configuration(kernel) kernel(ys, bools, indices; items = group_size, groups = cld(length(bools), group_size)) end # unsafe_free!(indices) return ys end ================================================ FILE: src/mapreduce.jl ================================================ ## COV_EXCL_START # TODO # - serial version for lower latency # - group-stride loop to delay need for second kernel launch # Widen sub-word types to avoid shared memory corruption on Intel GPUs. # Writing 1/2-byte values to local memory can clobber adjacent bytes. # Only applies to integer/boolean types where `%` conversion is valid. @inline _widen_type(::Type{Bool}) = Int32 @inline _widen_type(::Type{Int8}) = Int32 @inline _widen_type(::Type{UInt8}) = Int32 @inline _widen_type(::Type{Int16}) = Int32 @inline _widen_type(::Type{UInt16}) = Int32 @inline _widen_type(::Type{T}) where T = T # Dispatch-based conversions so the compiler never generates `%` for non-integer types @inline _to_wide(val, ::Type{W}) where W = val % W @inline _to_wide(val::T, ::Type{T}) where T = val @inline _from_wide(val, ::Type{T}) where T = val % T @inline _from_wide(val::T, ::Type{T}) where T = val # Reduce a value across a group, using local memory for communication @inline function reduce_group(op, val::T, neutral, ::Val{maxitems}) where {T, maxitems} items = get_local_size() item = get_local_id() # use a wider type for shared memory to avoid sub-word corruption W = _widen_type(T) shared = oneLocalArray(W, (maxitems,)) @inbounds shared[item] = _to_wide(val, W) # perform a reduction d = 1 while d < items barrier(0) index = 2 * d * (item-1) + 1 @inbounds if index <= items other_val = if index + d <= items _from_wide(shared[index+d], T) else neutral end shared[index] = _to_wide(op(_from_wide(shared[index], T), other_val), W) end d *= 2 end # load the final value on the first item if item == 1 val = @inbounds _from_wide(shared[item], T) end return val end Base.@propagate_inbounds _map_getindex(args::Tuple, I) = ((args[1][I]), _map_getindex(Base.tail(args), I)...) Base.@propagate_inbounds _map_getindex(args::Tuple{Any}, I) = ((args[1][I]),) Base.@propagate_inbounds _map_getindex(args::Tuple{}, I) = () # Reduce an array across the grid. All elements to be processed can be addressed by the # product of the two iterators `Rreduce` and `Rother`, where the latter iterator will have # singleton entries for the dimensions that should be reduced (and vice versa). function partial_mapreduce_device(f, op, neutral, maxitems, Rreduce, Rother, R, As...) # decompose the 1D hardware indices into separate ones for reduction (across items # and possibly groups if it doesn't fit) and other elements (remaining groups) localIdx_reduce = get_local_id() localDim_reduce = get_local_size() groupIdx_reduce, groupIdx_other = fldmod1(get_group_id(), length(Rother)) groupDim_reduce = get_num_groups() ÷ length(Rother) # group-based indexing into the values outside of the reduction dimension # (that means we can safely synchronize items within this group) iother = groupIdx_other @inbounds if iother <= length(Rother) Iother = Rother[iother] # load the neutral value Iout = CartesianIndex(Tuple(Iother)..., groupIdx_reduce) neutral = if neutral === nothing R[Iout] else neutral end val = op(neutral, neutral) # reduce serially across chunks of input vector that don't fit in a group ireduce = localIdx_reduce + (groupIdx_reduce - 1) * localDim_reduce while ireduce <= length(Rreduce) Ireduce = Rreduce[ireduce] J = max(Iother, Ireduce) val = op(val, f(_map_getindex(As, J)...)) ireduce += localDim_reduce * groupDim_reduce end val = reduce_group(op, val, neutral, maxitems) # write back to memory if localIdx_reduce == 1 R[Iout] = val end end return end ## COV_EXCL_STOP function GPUArrays.mapreducedim!(f::F, op::OP, R::oneWrappedArray{T}, A::Union{AbstractArray,Broadcast.Broadcasted}; init=nothing) where {F, OP, T} Base.check_reducedims(R, A) length(A) == 0 && return R # isempty(::Broadcasted) iterates # add singleton dimensions to the output container, if needed if ndims(R) < ndims(A) dims = Base.fill_to_length(size(R), 1, Val(ndims(A))) R = reshape(R, dims) end # iteration domain, split in two: one part covers the dimensions that should # be reduced, and the other covers the rest. combining both covers all values. Rall = CartesianIndices(axes(A)) Rother = CartesianIndices(axes(R)) Rreduce = CartesianIndices(ifelse.(axes(A) .== axes(R), Ref(Base.OneTo(1)), axes(A))) # NOTE: we hard-code `OneTo` (`first.(axes(A))` would work too) or we get a # CartesianIndices object with UnitRanges that behave badly on the GPU. @assert length(Rall) == length(Rother) * length(Rreduce) # allocate an additional, empty dimension to write the reduced value to. # this does not affect the actual location in memory of the final values, # but allows us to write a generalized kernel supporting partial reductions. R′ = reshape(R, (size(R)..., 1)) # how many items do we want? # # items in a group work together to reduce values across the reduction dimensions; # we want as many as possible to improve algorithm efficiency and execution occupancy. wanted_items = length(Rreduce) function compute_items(max_items) if wanted_items > max_items max_items else wanted_items end end # how many items can we launch? # # we might not be able to launch all those items to reduce each slice in one go. # that's why each items also loops across their inputs, processing multiple values # so that we can span the entire reduction dimension using a single item group. # group size is restricted by local memory (use widened type for sub-word types) max_lmem_elements = compute_properties(device()).maxSharedLocalMemory ÷ sizeof(_widen_type(T)) max_items = min(compute_properties(device()).maxTotalGroupSize, compute_items(max_lmem_elements ÷ 2)) # TODO: dynamic local memory to avoid two compilations # let the driver suggest a group size args = (f, op, init, Val(max_items), Rreduce, Rother, R′, A) kernel_args = kernel_convert.(args) kernel_tt = Tuple{Core.Typeof.(kernel_args)...} kernel = zefunction(partial_mapreduce_device, kernel_tt) reduce_items = compute_items(launch_configuration(kernel)) # how many groups should we launch? # # even though we can always reduce each slice in a single item group, that may not be # optimal as it might not saturate the GPU. we already launch some groups to process # independent dimensions in parallel; pad that number to ensure full occupancy. other_groups = length(Rother) reduce_groups = cld(length(Rreduce), reduce_items) # determine the launch configuration items = reduce_items groups = reduce_groups*other_groups # perform the actual reduction if reduce_groups == 1 # we can cover the dimensions to reduce using a single group @oneapi items groups partial_mapreduce_device( f, op, init, Val(items), Rreduce, Rother, R′, A) else # we need multiple steps to cover all values to reduce partial = similar(R, (size(R)..., reduce_groups)) if init === nothing # without an explicit initializer we need to copy from the output container partial .= R end @oneapi items groups partial_mapreduce_device( f, op, init, Val(items), Rreduce, Rother, partial, A) GPUArrays.mapreducedim!(identity, op, R′, partial; init=init) end return R end ================================================ FILE: src/memory.jl ================================================ # memory operations """ Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice, dst, src, N) Low-level memory copy operation on the GPU. Copies `N` elements of type `T` from `src` to `dst` using the specified context and device. Both `src` and `dst` can be either host pointers (`Ptr`) or device pointers (`ZePtr`). # Arguments - `ctx::ZeContext`: Level Zero context - `dev::ZeDevice`: Level Zero device - `dst::Union{Ptr{T},ZePtr{T}}`: Destination pointer - `src::Union{Ptr{T},ZePtr{T}}`: Source pointer - `N::Integer`: Number of elements to copy !!! warning This is a low-level function. No bounds checking is performed. For safe array copying, use `copyto!` on `oneArray` objects instead. See also: [`copyto!`](@ref), [`oneArray`](@ref) """ function Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice, dst::Union{Ptr{T},ZePtr{T}}, src::Union{Ptr{T},ZePtr{T}}, N::Integer) where T bytes = N*sizeof(T) bytes==0 && return execute!(global_queue(ctx, dev)) do list append_copy!(list, dst, src, bytes) end end """ unsafe_fill!(ctx::ZeContext, dev::ZeDevice, ptr, pattern, N) Low-level memory fill operation on the GPU. Fills `N` elements at `ptr` with the given pattern using the specified context and device. # Arguments - `ctx::ZeContext`: Level Zero context - `dev::ZeDevice`: Level Zero device - `ptr::Union{Ptr{T},ZePtr{T}}`: Pointer to memory to fill - `pattern::Union{Ptr{T},ZePtr{T}}`: Pointer to pattern value - `N::Integer`: Number of elements to fill !!! warning This is a low-level function. For safe array operations, use `fill!` on `oneArray` objects instead. See also: [`fill!`](@ref), [`oneArray`](@ref) """ function unsafe_fill!(ctx::ZeContext, dev::ZeDevice, ptr::Union{Ptr{T},ZePtr{T}}, pattern::Union{Ptr{T},ZePtr{T}}, N::Integer) where T bytes = N*sizeof(T) bytes==0 && return execute!(global_queue(ctx, dev)) do list append_fill!(list, ptr, pattern, sizeof(T), bytes) end end ================================================ FILE: src/oneAPI.jl ================================================ module oneAPI using GPUArrays using Adapt using GPUCompiler import ExprTools using SpecialFunctions import Preferences import KernelAbstractions: KernelAbstractions using LLVM using LLVM.Interop using Core: LLVMPtr using SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll using oneAPI_Support_jll export oneL0 # core library include("../lib/utils/APIUtils.jl") include("../lib/level-zero/oneL0.jl") using .oneL0 functional() = oneL0.functional[] # device functionality import SPIRVIntrinsics SPIRVIntrinsics.@import_all SPIRVIntrinsics.@reexport_public Base.Experimental.@MethodTable(method_table) include("device/runtime.jl") include("device/array.jl") include("device/quirks.jl") include("device/atomics.jl") # essential stuff include("context.jl") # array abstraction include("memory.jl") include("pool.jl") include("array.jl") # compiler implementation include("compiler/compilation.jl") include("compiler/execution.jl") include("compiler/reflection.jl") if Sys.islinux() # library interop include("../lib/support/Support.jl") include("../lib/sycl/SYCL.jl") using .SYCL export SYCL # array libraries include("../lib/mkl/oneMKL.jl") export oneMKL end # integrations and specialized functionality include("broadcast.jl") include("mapreduce.jl") include("gpuarrays.jl") include("random.jl") include("utils.jl") include("oneAPIKernels.jl") import .oneAPIKernels: oneAPIBackend include("accumulate.jl") include("sorting.jl") include("indexing.jl") export oneAPIBackend function __init__() precompiling = ccall(:jl_generating_output, Cint, ()) != 0 precompiling && return if oneL0.NEO_jll.is_available() && oneL0.functional[] if Sys.iswindows() @warn """oneAPI.jl support for native Windows is experimental and incomplete. For the time being, it is recommended to use WSL or Linux instead.""" else # ensure that the OpenCL loader finds the ICD files from our artifacts ENV["OCL_ICD_FILENAMES"] = oneL0.NEO_jll.libigdrcl end # XXX: work around an issue with SYCL/Level Zero interoperability # (see JuliaGPU/oneAPI.jl#417) ENV["SYCL_PI_LEVEL_ZERO_BATCH_SIZE"] = "1" end return nothing end function set_debug!(debug::Bool) for jll in [oneL0.NEO_jll, oneL0.NEO_jll.libigc_jll] Preferences.set_preferences!(jll, "debug" => string(debug); force=true) end @info "oneAPI debug mode $(debug ? "enabled" : "disabled"); please re-start Julia." end end ================================================ FILE: src/oneAPIKernels.jl ================================================ module oneAPIKernels using ..oneAPI using ..oneAPI: @device_override, SPIRVIntrinsics, method_table import KernelAbstractions as KA import StaticArrays import Adapt ## Back-end Definition export oneAPIBackend struct oneAPIBackend <: KA.GPU prefer_blocks::Bool always_inline::Bool end oneAPIBackend(; prefer_blocks = false, always_inline = false) = oneAPIBackend(prefer_blocks, always_inline) @inline KA.allocate(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where {T} = oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims) @inline KA.zeros(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where {T} = fill!(oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims), zero(T)) @inline KA.ones(::oneAPIBackend, ::Type{T}, dims::Tuple; unified::Bool = false) where {T} = fill!(oneArray{T, length(dims), unified ? oneAPI.oneL0.SharedBuffer : oneAPI.oneL0.DeviceBuffer}(undef, dims), one(T)) KA.get_backend(::oneArray) = oneAPIBackend() # TODO should be non-blocking KA.synchronize(::oneAPIBackend) = oneAPI.oneL0.synchronize() KA.supports_float64(::oneAPIBackend) = false # TODO: Check if this is device dependent KA.supports_unified(::oneAPIBackend) = true KA.functional(::oneAPIBackend) = oneAPI.functional() Adapt.adapt_storage(::oneAPIBackend, a::AbstractArray) = Adapt.adapt(oneArray, a) Adapt.adapt_storage(::oneAPIBackend, a::oneArray) = a Adapt.adapt_storage(::KA.CPU, a::oneArray) = convert(Array, a) ## Memory Operations function KA.copyto!(::oneAPIBackend, A, B) copyto!(A, B) # TODO: Address device to host copies in jl being synchronizing end ## Device Operations function KA.ndevices(::oneAPIBackend) return length(oneAPI.devices()) end function KA.device(::oneAPIBackend)::Int dev = oneAPI.device() devs = oneAPI.devices() idx = findfirst(==(dev), devs) return idx === nothing ? 1 : idx end function KA.device!(backend::oneAPIBackend, id::Int) return oneAPI.device!(id) end ## Kernel Launch function KA.mkcontext(kernel::KA.Kernel{oneAPIBackend}, _ndrange, iterspace) KA.CompilerMetadata{KA.ndrange(kernel), KA.DynamicCheck}(_ndrange, iterspace) end function KA.mkcontext(kernel::KA.Kernel{oneAPIBackend}, I, _ndrange, iterspace, ::Dynamic) where Dynamic KA.CompilerMetadata{KA.ndrange(kernel), Dynamic}(I, _ndrange, iterspace) end function KA.launch_config(kernel::KA.Kernel{oneAPIBackend}, ndrange, workgroupsize) if ndrange isa Integer ndrange = (ndrange,) end if workgroupsize isa Integer workgroupsize = (workgroupsize, ) end # partition checked that the ndrange's agreed if KA.ndrange(kernel) <: KA.StaticSize ndrange = nothing end iterspace, dynamic = if KA.workgroupsize(kernel) <: KA.DynamicSize && workgroupsize === nothing # use ndrange as preliminary workgroupsize for autotuning KA.partition(kernel, ndrange, ndrange) else KA.partition(kernel, ndrange, workgroupsize) end return ndrange, workgroupsize, iterspace, dynamic end function threads_to_workgroupsize(threads, ndrange) total = 1 return map(ndrange) do n x = min(div(threads, total), n) total *= x return x end end function (obj::KA.Kernel{oneAPIBackend})(args...; ndrange=nothing, workgroupsize=nothing) backend = KA.backend(obj) ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, ndrange, workgroupsize) # this might not be the final context, since we may tune the workgroupsize ctx = KA.mkcontext(obj, ndrange, iterspace) # If the kernel is statically sized we can tell the compiler about that if KA.workgroupsize(obj) <: KA.StaticSize # TODO: maxthreads # maxthreads = prod(KA.get(KA.workgroupsize(obj))) else # maxthreads = nothing end kernel = @oneapi launch = false always_inline = backend.always_inline obj.f(ctx, args...) # figure out the optimal workgroupsize automatically if KA.workgroupsize(obj) <: KA.DynamicSize && workgroupsize === nothing items = oneAPI.launch_configuration(kernel) if backend.prefer_blocks # Prefer blocks over threads: # Reducing the workgroup size (items) increases the number of workgroups (blocks). # We use a simple heuristic here since we lack full occupancy info (max_blocks) from launch_configuration. # If the total range is large enough, full workgroups are fine. # If the range is small, we might want to reduce 'items' to create more blocks to fill the GPU. # (Simplified logic compared to CUDA.jl which uses explicit occupancy calculators) total_items = prod(ndrange) if total_items < items * 16 # Heuristic factor # Force at least a few blocks if possible by reducing items per block target_blocks = 16 # Target at least 16 blocks items = max(1, min(items, cld(total_items, target_blocks))) end end workgroupsize = threads_to_workgroupsize(items, ndrange) iterspace, dynamic = KA.partition(obj, ndrange, workgroupsize) ctx = KA.mkcontext(obj, ndrange, iterspace) end groups = length(KA.blocks(iterspace)) items = length(KA.workitems(iterspace)) if groups == 0 return nothing end # Launch kernel kernel(ctx, args...; items, groups) return nothing end ## Indexing Functions @device_override @inline function KA.__index_Local_Linear(ctx) return get_local_id() end @device_override @inline function KA.__index_Group_Linear(ctx) return get_group_id() end @device_override @inline function KA.__index_Global_Linear(ctx) return get_global_id() end @device_override @inline function KA.__index_Local_Cartesian(ctx) @inbounds KA.workitems(KA.__iterspace(ctx))[get_local_id()] end @device_override @inline function KA.__index_Group_Cartesian(ctx) @inbounds KA.blocks(KA.__iterspace(ctx))[get_group_id()] end @device_override @inline function KA.__index_Global_Cartesian(ctx) return @inbounds KA.expand(KA.__iterspace(ctx), get_group_id(), get_local_id()) end @device_override @inline function KA.__validindex(ctx) if KA.__dynamic_checkbounds(ctx) I = @inbounds KA.expand(KA.__iterspace(ctx), get_group_id(), get_local_id()) return I in KA.__ndrange(ctx) else return true end end ## Shared and Scratch Memory @device_override @inline function KA.SharedMemory(::Type{T}, ::Val{Dims}, ::Val{Id}) where {T, Dims, Id} ptr = oneAPI.emit_localmemory(T, Val(prod(Dims))) oneDeviceArray(Dims, ptr) end @device_override @inline function KA.Scratchpad(ctx, ::Type{T}, ::Val{Dims}) where {T, Dims} StaticArrays.MArray{KA.__size(Dims), T}(undef) end ## Synchronization and Printing @device_override @inline function KA.__synchronize() barrier(0) end @device_override @inline function KA.__print(args...) oneAPI._print(args...) end ## Other Adapt.adapt_storage(to::KA.ConstAdaptor, a::oneDeviceArray) = Base.Experimental.Const(a) KA.argconvert(::KA.Kernel{oneAPIBackend}, arg) = kernel_convert(arg) function KA.priority!(::oneAPIBackend, prio::Symbol) if !(prio in (:high, :normal, :low)) error("priority must be one of :high, :normal, :low") end priority_enum = if prio == :high oneAPI.oneL0.ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH elseif prio == :low oneAPI.oneL0.ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW else oneAPI.oneL0.ZE_COMMAND_QUEUE_PRIORITY_NORMAL end ctx = oneAPI.context() dev = oneAPI.device() # Update the cached queue # We synchronize the current queue first to ensure safety current_queue = oneAPI.global_queue(ctx, dev) oneAPI.oneL0.synchronize(current_queue) # Replace the queue in task_local_storage # The key used by global_queue is (:ZeCommandQueue, ctx, dev) new_queue = oneAPI.oneL0.ZeCommandQueue( ctx, dev; flags = oneAPI.oneL0.ZE_COMMAND_QUEUE_FLAG_IN_ORDER, priority = priority_enum ) task_local_storage((:ZeCommandQueue, ctx, dev), new_queue) return nothing end end ================================================ FILE: src/pool.jl ================================================ # Track total allocated GPU memory (device + shared buffers) for proactive GC. # This mirrors AMDGPU.jl's approach: trigger GC before OOM so that finalizers # can free stale GPU buffers that Julia's GC hasn't collected yet (Julia's GC # only sees CPU memory pressure, not GPU memory pressure). const _allocated_bytes = Threads.Atomic{Int64}(0) const _total_mem_cache = Threads.Atomic{Int64}(0) function _get_total_mem(dev) cached = _total_mem_cache[] cached > 0 && return cached total = only(oneL0.memory_properties(dev)).totalSize Threads.atomic_cas!(_total_mem_cache, Int64(0), Int64(total)) return _total_mem_cache[] end function _maybe_gc(dev, bytes) allocated = _allocated_bytes[] allocated <= 0 && return total_mem = _get_total_mem(dev) return if allocated + bytes > total_mem * 0.8 # Flush deferred resource releases (e.g., MKL sparse handles) from previous GC # cycles first — these are safe to release now because they were deferred earlier. # Do this BEFORE GC to avoid racing with new finalizers. oneL0._run_reclaim_callbacks() # Full GC to collect old-generation objects whose finalizers free GPU memory. GC.gc(true) elseif allocated + bytes > total_mem * 0.4 GC.gc(false) end end function allocate(::Type{oneL0.DeviceBuffer}, ctx, dev, bytes::Int, alignment::Int) bytes == 0 && return oneL0.DeviceBuffer(ZE_NULL, bytes, ctx, dev) _maybe_gc(dev, bytes) buf = device_alloc(ctx, dev, bytes, alignment) make_resident(ctx, dev, buf) Threads.atomic_add!(_allocated_bytes, Int64(bytes)) return buf end function allocate(::Type{oneL0.SharedBuffer}, ctx, dev, bytes::Int, alignment::Int) bytes == 0 && return oneL0.SharedBuffer(ZE_NULL, bytes, ctx, dev) # TODO: support cross-device shared buffers (by setting `dev=nothing`) _maybe_gc(dev, bytes) buf = shared_alloc(ctx, dev, bytes, alignment) make_resident(ctx, dev, buf) Threads.atomic_add!(_allocated_bytes, Int64(bytes)) return buf end function allocate(::Type{oneL0.HostBuffer}, ctx, dev, bytes::Int, alignment::Int) bytes == 0 && return oneL0.HostBuffer(ZE_NULL, bytes, ctx) host_alloc(ctx, bytes, alignment) end function release(buf::oneL0.AbstractBuffer) sizeof(buf) == 0 && return if buf isa oneL0.DeviceBuffer || buf isa oneL0.SharedBuffer Threads.atomic_sub!(_allocated_bytes, Int64(sizeof(buf))) end # XXX: is it necessary to evice memory if we are going to free it? # this is racy, because eviction is not queue-ordered, and # we don't want to synchronize inside what could have been a # GC-driven finalizer. if we need to, port the stream/queue # tracking from CUDA.jl so that we can synchronize only the # queue that's associated with the buffer. #if buf isa oneL0.DeviceBuffer || buf isa oneL0.SharedBuffer # ctx = oneL0.context(buf) # dev = oneL0.device(buf) # evict(ctx, dev, buf) #end free(buf; policy=oneL0.ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_BLOCKING_FREE) # TODO: queue-ordered free from non-finalizer tasks once we have # `zeMemFreeAsync(ptr, queue)` return end ================================================ FILE: src/random.jl ================================================ using Random gpuarrays_rng() = GPUArrays.default_rng(oneArray) # GPUArrays in-place Random.rand!(A::oneWrappedArray) = Random.rand!(gpuarrays_rng(), A) Random.randn!(A::oneWrappedArray) = Random.randn!(gpuarrays_rng(), A) # GPUArrays out-of-place rand(T::Type, dims::Dims) = Random.rand!(oneArray{T}(undef, dims...)) randn(T::Type, dims::Dims; kwargs...) = Random.randn!(oneArray{T}(undef, dims...); kwargs...) # support all dimension specifications rand(T::Type, dim1::Integer, dims::Integer...) = Random.rand!(oneArray{T}(undef, dim1, dims...)) randn(T::Type, dim1::Integer, dims::Integer...; kwargs...) = Random.randn!(oneArray{T}(undef, dim1, dims...); kwargs...) # untyped out-of-place rand(dim1::Integer, dims::Integer...) = Random.rand!(oneArray{Float32}(undef, dim1, dims...)) randn(dim1::Integer, dims::Integer...; kwargs...) = Random.randn!(oneArray{Float32}(undef, dim1, dims...); kwargs...) # seeding seed!(seed=Base.rand(UInt64)) = Random.seed!(gpuarrays_rng(), seed) ================================================ FILE: src/sorting.jl ================================================ Base.sort!(x::oneArray; kwargs...) = (AK.sort!(x; kwargs...); return x) Base.sortperm!(ix::oneArray, x::oneArray; kwargs...) = (AK.sortperm!(ix, x; kwargs...); return ix) Base.sortperm(x::oneArray; kwargs...) = sortperm!(oneArray(1:length(x)), x; kwargs...) ================================================ FILE: src/utils.jl ================================================ function versioninfo(io::IO=stdout) if Sys.islinux() println(io, "Binary dependencies:") for jll in [oneL0.NEO_jll, oneL0.NEO_jll.libigc_jll, oneL0.NEO_jll.gmmlib_jll, SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll, oneAPI_Support_jll] name = string(jll) print(io, "- $(name[1:end-4]): $(Base.pkgversion(jll))") if jll.host_platform !== nothing debug = tryparse(Bool, get(jll.host_platform.tags, "debug", "false")) if debug === true print(io, " (debug)") end end if jll === oneAPI_Support_jll ver = oneAPI.oneMKL.version() print(io, " (oneMKL v$ver)") end println(io) end println(io) end println(io, "Toolchain:") println(io, "- Julia: $VERSION") println(io, "- LLVM: $(LLVM.version())") println(io) println(io, "Julia packages:") println(io, "- oneAPI.jl: $(Base.pkgversion(oneAPI))") for name in [:GPUArrays, :GPUCompiler, :KernelAbstractions, :LLVM, :SPIRVIntrinsics] mod = getfield(oneAPI, name) println(io, "- $(name): $(Base.pkgversion(mod))") end println(io) env = filter(var->startswith(var, "JULIA_ONEAPI"), keys(ENV)) if !isempty(env) println(io, "Environment:") for var in env println(io, "- $var: $(ENV[var])") end println(io) end drvs = drivers() if isempty(drvs) println(io, "No oneAPI-capable drivers.") elseif length(drvs) == 1 println(io, "1 driver:") else println(io, length(drvs), " drivers:") end for drv in drivers() props = properties(drv) println(io, "- $(props.uuid) (v$(props.driverVersion), API v$(api_version(drv)))") end println(io) devs = [dev for drv in drivers() for dev in devices(drv)] if isempty(devs) println(io, "No oneAPI-capable devices.") elseif length(devs) == 1 println(io, "1 device:") else println(io, length(devs), " devices:") end for dev in devs props = properties(dev) println(io, "- $(props.name)") end end """ @sync ex Run expression `ex` and synchronize the GPU afterwards. See also: `synchronize`. """ macro sync(ex) quote local ret = $(esc(ex)) synchronize() ret end end ================================================ FILE: test/Project.toml ================================================ [deps] AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NEO_jll = "700fe977-ac61-5f37-bbc8-c6c4b2b6a9fd" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" libigc_jll = "94295238-5935-5bd7-bb0f-b00942e9bdd5" oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" oneAPI_Support_jll = "b049733a-a71d-5ed3-8eba-7d323ac00b36" ================================================ FILE: test/array.jl ================================================ using LinearAlgebra import Adapt @testset "constructors" begin xs = oneArray{Int}(undef, 2, 3) @test collect(oneArray([1 2; 3 4])) == [1 2; 3 4] @test testf(vec, rand(Float32, 5,3)) @test Base.elsize(xs) == sizeof(Int) @test oneArray{Int, 2}(xs) === xs @test_throws ArgumentError Base.unsafe_convert(Ptr{Int}, xs) @test_throws ArgumentError Base.unsafe_convert(Ptr{Float32}, xs) @test collect(oneAPI.zeros(Float32, 2, 2)) == zeros(Float32, 2, 2) @test collect(oneAPI.ones(Float32, 2, 2)) == ones(Float32, 2, 2) @test collect(oneAPI.fill(0, 2, 2)) == zeros(Int, 2, 2) @test collect(oneAPI.fill(1, 2, 2)) == ones(Int, 2, 2) end @testset "adapt" begin A = rand(Float32, 3, 3) dA = oneArray(A) @test Adapt.adapt(Array, dA) == A @test Adapt.adapt(oneArray, A) isa oneArray @test Array(Adapt.adapt(oneArray, A)) == A end @testset "reshape" begin A = [1 2 3 4 5 6 7 8] gA = reshape(oneArray(A),1,8) _A = reshape(A,1,8) _gA = Array(gA) @test all(_A .== _gA) A = [1,2,3,4] gA = reshape(oneArray(A),4) end @testset "fill(::SubArray)" begin xs = oneAPI.zeros(Float32, 3) fill!(view(xs, 2:2), 1) @test Array(xs) == [0,1,0] end @testset "reinterpret of view with non-aligned offset" begin # reinterpreting a view to a larger element type where the byte offset # is not a multiple of the new element size a = oneArray(Int32[1,2,3,4,5,6,7,8,9]) v = view(a, 2:7) # offset of 1 Int32 = 4 bytes r = reinterpret(Int64, v) # Int64 = 8 bytes; 4 is not a multiple of 8 @test Array(r) == reinterpret(Int64, @view Array(a)[2:7]) end @testset "shared buffers & unsafe_wrap" begin a = oneVector{Int,oneL0.SharedBuffer}(undef, 2) # check that basic operations work on arrays backed by shared memory fill!(a, 40) a .+= 2 @test Array(a) == [42, 42] # derive an Array object and test that the memory keeps in sync b = unsafe_wrap(Array, a) b[1] = 100 @test Array(a) == [100, 42] oneAPI.@sync copyto!(a, 2, [200], 1, 1) @test b == [100, 200] end # https://github.com/JuliaGPU/CUDA.jl/issues/2191 @testset "preserving buffer types" begin a = oneVector{Int,oneL0.SharedBuffer}([1]) @test oneAPI.buftype(a) == oneL0.SharedBuffer # unified-ness should be preserved b = a .+ 1 @test oneAPI.buftype(b) == oneL0.SharedBuffer # when there's a conflict, we should defer to unified memory c = oneVector{Int,oneL0.HostBuffer}([1]) d = oneVector{Int,oneL0.DeviceBuffer}([1]) e = c .+ d @test oneAPI.buftype(e) == oneL0.SharedBuffer end @testset "resizing" begin a = oneArray([1,2,3]) resize!(a, 3) @test length(a) == 3 @test Array(a) == [1,2,3] resize!(a, 5) @test length(a) == 5 @test Array(a)[1:3] == [1,2,3] resize!(a, 2) @test length(a) == 2 @test Array(a)[1:2] == [1,2] b = oneArray{Int}(undef, 0) @test length(b) == 0 resize!(b, 1) @test length(b) == 1 end ================================================ FILE: test/device/intrinsics.jl ================================================ @testset "work items" begin @on_device get_work_dim() |> sink @on_device get_global_size() |> sink @on_device get_global_id() |> sink @on_device get_local_size() |> sink @on_device get_enqueued_local_size() |> sink @on_device get_local_id() |> sink @on_device get_num_groups() |> sink @on_device get_group_id() |> sink @on_device get_global_offset() |> sink @on_device get_global_linear_id() |> sink @on_device get_local_linear_id() |> sink end ############################################################################################ @testset "math" begin @testset "log10" begin @test testf(a->log10.(a), Float32[100]) end for op in (exp, exp2, exp10, expm1) @testset "$op" begin typs = [Float32] float64_supported && push!(typs, Float64) for T in typs @test testf(x->op.(x), rand(T, 1)) @test testf(x->op.(x), -rand(T, 1)) end end end @testset "exp" begin @test testf(a->exp.(a), Matrix{ComplexF32}([1.0 + 1.0im 1.0 - 1.0im; -1.0 + 1.0im -1.0 - 1.0im])) end end ############################################################################################ endline = Sys.iswindows() ? "\r\n" : "\n" @testset "formatted output" begin # BROKEN: cintel/compute-runtime#635 #_, out = @grab_output @on_device oneAPI.@printf("") #@test out == "" _, out = @grab_output @on_device oneAPI.@printf("Testing...\n") @test out == "Testing...$endline" # narrow integer _, out = @grab_output @on_device oneAPI.@printf("Testing %d %d...\n", Int32(1), Int32(2)) @test out == "Testing 1 2...$endline" # wide integer _, out = @grab_output if Sys.iswindows() @on_device oneAPI.@printf("Testing %lld %lld...\n", Int64(1), Int64(2)) else @on_device oneAPI.@printf("Testing %ld %ld...\n", Int64(1), Int64(2)) end @test out == "Testing 1 2...$endline" _, out = @grab_output @on_device begin oneAPI.@printf("foo") oneAPI.@printf("bar\n") end @test out == "foobar$endline" # c argument promotions if float64_supported function kernel(A) oneAPI.@printf("%f %f\n", A[1], A[1]) return end x = oneArray(ones(Float64, 2, 2)) _, out = @grab_output begin @oneapi kernel(x) synchronize() end @test out == "1.000000 1.000000$endline" end end @testset "@print" begin # basic @print/@println _, out = @grab_output @on_device oneAPI.@print("Hello, World\n") @test out == "Hello, World$endline" _, out = @grab_output @on_device oneAPI.@println("Hello, World") @test out == "Hello, World$endline" # argument interpolation (by the macro, so can use literals) _, out = @grab_output @on_device oneAPI.@print("foobar") @test out == "foobar" _, out = @grab_output @on_device oneAPI.@print(:foobar) @test out == "foobar" _, out = @grab_output @on_device oneAPI.@print("foo", "bar") @test out == "foobar" _, out = @grab_output @on_device oneAPI.@print("foobar ", 42) @test out == "foobar 42" _, out = @grab_output @on_device oneAPI.@print("foobar $(42)") @test out == "foobar 42" _, out = @grab_output @on_device oneAPI.@print("foobar $(4)", 2) @test out == "foobar 42" _, out = @grab_output @on_device oneAPI.@print("foobar ", 4, "$(2)") @test out == "foobar 42" _, out = @grab_output @on_device oneAPI.@print(42) @test out == "42" _, out = @grab_output @on_device oneAPI.@print(4, 2) @test out == "42" # bug: @println failed to invokce @print with endline in the case of interpolation _, out = @grab_output @on_device oneAPI.@println("foobar $(42)") @test out == "foobar 42$endline" # argument types # we're testing the generated functions now, so can't use literals function test_output(val, str) canary = rand(Int32) # if we mess up the main arg, this one will print wrong _, out = @grab_output @on_device oneAPI.@print(val, " (", canary, ")") @test out == "$(str) ($(Int(canary)))" end for typ in (Int16, Int32, Int64, UInt16, UInt32, UInt64) test_output(typ(42), "42") end if float64_supported for typ in (Float32, Float64) test_output(typ(42), "42.000000") end end test_output(Cchar('c'), "c") for typ in (Ptr{Cvoid}, Ptr{Int}) ptr = convert(typ, Int(0x12345)) test_output(ptr, Sys.iswindows() ? "0000000000012345" : "0x12345") end test_output(true, "1") test_output(false, "0") # escaping kernel1(val) = (oneAPI.@print(val); nothing) _, out = @grab_output @on_device kernel1(42) @test out == "42" kernel2(val) = (oneAPI.@println(val); nothing) _, out = @grab_output @on_device kernel2(42) @test out == "42$endline" end float64_supported && @testset "@show" begin function kernel() seven_i32 = Int32(7) three_f64 = Float64(3) oneAPI.@show seven_i32 oneAPI.@show three_f64 1f0 + 4f0 return nothing end _, out = @grab_output @on_device kernel() @test out == "seven_i32 = 7$(endline)three_f64 = 3.000000$(endline)1.0f0 + 4.0f0 = 5.000000$(endline)" end ############################################################################################ # a composite type to test for more complex element types @eval struct RGB{T} r::T g::T b::T end @testset "local memory" begin n = 256 @testset "constructors" begin # static @on_device oneLocalArray(Float32, 1) @on_device oneLocalArray(Float32, (1,2)) @on_device oneLocalArray(Tuple{Float32, Float32}, 1) @on_device oneLocalArray(Tuple{Float32, Float32}, (1,2)) @on_device oneLocalArray(Tuple{RGB{Float32}, UInt32}, 1) @on_device oneLocalArray(Tuple{RGB{Float32}, UInt32}, (1,2)) end @testset "static" begin @testset "statically typed" begin function kernel(d, n) t = get_local_id() tr = n-t+1 s = oneLocalArray(Float32, 1024) s2 = oneLocalArray(Float32, 1024) # catch aliasing s[t] = d[t] s2[t] = 2*d[t] barrier(0) d[t] = s[tr] return end a = rand(Float32, n) d_a = oneArray(a) @oneapi items=n kernel(d_a, n) @test reverse(a) == Array(d_a) end @testset "parametrically typed" begin typs = [Int32, Int64, Float32] float64_supported && push!(typs, Float64) @testset for typ in typs function kernel(d::oneDeviceArray{T}, n) where {T} t = get_local_id() tr = n-t+1 s = oneLocalArray(T, 1024) s2 = oneLocalArray(T, 1024) # catch aliasing s[t] = d[t] s2[t] = d[t] barrier(0) d[t] = s[tr] return end a = rand(typ, n) d_a = oneArray(a) @oneapi items=n kernel(d_a, n) @test reverse(a) == Array(d_a) end end end end ############################################################################################ # @testset "atomics (low level)" begin @testset "atomic_add($T)" for T in [Int32, UInt32, Float32] if oneAPI.is_integrated() && T == Float32 continue end a = oneArray([zero(T)]) function kernel(a, b) oneAPI.atomic_add!(pointer(a), b) return end @oneapi items=256 kernel(a, one(T)) @test Array(a)[1] == T(256) end @testset "atomic_sub($T)" for T in [Int32, UInt32, Float32] if oneAPI.is_integrated() && T == Float32 continue end a = oneArray([T(256)]) function kernel(a, b) oneAPI.atomic_sub!(pointer(a), b) return end @oneapi items=256 kernel(a, one(T)) @test Array(a)[1] == T(0) end @testset "atomic_inc($T)" for T in [Int32, UInt32] a = oneArray([zero(T)]) function kernel(a) oneAPI.atomic_inc!(pointer(a)) return end @oneapi items=256 kernel(a) @test Array(a)[1] == T(256) end @testset "atomic_dec($T)" for T in [Int32, UInt32] a = oneArray([T(256)]) function kernel(a) oneAPI.atomic_dec!(pointer(a)) return end @oneapi items=256 kernel(a) @test Array(a)[1] == T(0) end @testset "atomic_min($T)" for T in [Int32, UInt32, Float32] if oneAPI.is_integrated() && T == Float32 continue end a = oneArray([T(256)]) function kernel(a, T) i = get_global_id() oneAPI.atomic_min!(pointer(a), T(i)) return end @oneapi items=256 kernel(a, T) @test Array(a)[1] == one(T) end @testset "atomic_max($T)" for T in [Int32, UInt32, Float32] if oneAPI.is_integrated() && T == Float32 continue end a = oneArray([zero(T)]) function kernel(a, T) i = get_global_id() oneAPI.atomic_max!(pointer(a), T(i)) return end @oneapi items=256 kernel(a, T) @test Array(a)[1] == T(256) end @testset "atomic_and($T)" for T in [Int32, UInt32] a = oneArray([T(1023)]) function kernel(a, T) i = get_global_id() - 1 k = 1 for i = 1:i k *= 2 end b = 1023 - k # 1023 - 2^i oneAPI.atomic_and!(pointer(a), T(b)) return end @oneapi items=10 kernel(a, T) @test Array(a)[1] == zero(T) end @testset "atomic_or($T)" for T in [Int32, UInt32] a = oneArray([zero(T)]) function kernel(a, T) i = get_global_id() b = 1 # 2^(i-1) for i = 1:i b *= 2 end b ÷= 2 oneAPI.atomic_or!(pointer(a), T(b)) return end @oneapi items=10 kernel(a, T) @test Array(a)[1] == T(1023) end @testset "atomic_xor($T)" for T in [Int32, UInt32] a = oneArray([T(1023)]) function kernel(a, T) i = get_global_id() b = 1 # 2^(i-1) for i = 1:i b *= 2 end b ÷= 2 oneAPI.atomic_xor!(pointer(a), T(b)) return end @oneapi items=10 kernel(a, T) @test Array(a)[1] == zero(T) end @testset "atomic_xchg($T)" for T in [Int32, UInt32, Float32] if oneAPI.is_integrated() && T == Float32 continue end a = oneArray([zero(T)]) function kernel(a, b) oneAPI.atomic_xchg!(pointer(a), b) return end @oneapi items=256 kernel(a, one(T)) @test Array(a)[1] == one(T) end # end ############################################################################################ @testset "atomics (high-level)" begin @testset "add" begin @testset for T in [Int32, UInt32, Float32] a = oneArray([zero(T)]) function kernel(T, a) oneAPI.@atomic a[1] = a[1] + 1 oneAPI.@atomic a[1] += 1 return end @oneapi items=256 kernel(T, a) @test Array(a)[1] == 512 end end @testset "sub" begin @testset for T in [Int32, UInt32, Float32] a = oneArray(T[1024]) function kernel(T, a) oneAPI.@atomic a[1] = a[1] - 1 oneAPI.@atomic a[1] -= 1 return end @oneapi items=256 kernel(T, a) @test Array(a)[1] == 512 end end @testset "and" begin @testset for T in [Int32, UInt32] a = oneArray([~zero(T), ~zero(T)]) function kernel(T, a) i = get_local_id() mask = ~(T(1) << (i-1)) oneAPI.@atomic a[1] = a[1] & mask oneAPI.@atomic a[2] &= mask return end @oneapi items=8*sizeof(T) kernel(T, a) @test Array(a)[1] == zero(T) @test Array(a)[2] == zero(T) end end @testset "or" begin @testset for T in [Int32, UInt32] a = oneArray([zero(T), zero(T)]) function kernel(T, a) i = get_local_id() mask = T(1) << (i-1) oneAPI.@atomic a[1] = a[1] | mask oneAPI.@atomic a[2] |= mask return end @oneapi items=8*sizeof(T) kernel(T, a) @test Array(a)[1] == ~zero(T) @test Array(a)[2] == ~zero(T) end end @testset "xor" begin @testset for T in [Int32, UInt32] a = oneArray([zero(T), zero(T)]) function kernel(T, a) i = get_local_id() mask = T(1) << ((i-1)%(8*sizeof(T))) oneAPI.@atomic a[1] = a[1] ⊻ mask oneAPI.@atomic a[2] ⊻= mask return end nb = 4 @oneapi items=(8*sizeof(T)+nb) kernel(T, a) @test Array(a)[1] == ~zero(T) & ~((one(T) << nb) - one(T)) @test Array(a)[2] == ~zero(T) & ~((one(T) << nb) - one(T)) end end @testset "max" begin @testset for T in [Int32, UInt32, Float32] a = oneArray([zero(T)]) function kernel(T, a) i = get_local_id() oneAPI.@atomic a[1] = max(a[1], i) return end @oneapi items=32 kernel(T, a) @test Array(a)[1] == 32 end end @testset "min" begin @testset for T in [Int32, UInt32, Float32] a = oneArray([typemax(T)]) function kernel(T, a) i = get_local_id() oneAPI.@atomic a[1] = min(a[1], i) return end @oneapi items=32 kernel(T, a) @test Array(a)[1] == 1 end end @testset "mul" begin @testset for T in [Int32, UInt32, Float32] a = oneArray(T[1]) function kernel(T, a) oneAPI.@atomic a[1] = a[1] * 2 oneAPI.@atomic a[1] *= 2 return end @oneapi items=8 kernel(T, a) @test Array(a)[1] == 65536 end end @testset "div" begin @testset for T in [Int32, UInt32, Float32] a = oneArray(T[65536]) function kernel(T, a) oneAPI.@atomic a[1] = a[1] ÷ 2 oneAPI.@atomic a[1] ÷= 2 return end @oneapi items=8 kernel(T, a) @test Array(a)[1] == 1 end end @testset "macro" begin using oneAPI: AtomicError @test_throws AtomicError("right-hand side of an @atomic assignment should be a call") @macroexpand begin oneAPI.@atomic a[1] = 1 end @test_throws AtomicError("right-hand side of an @atomic assignment should be a call") @macroexpand begin oneAPI.@atomic a[1] = b ? 1 : 2 end @test_throws AtomicError("right-hand side of a non-inplace @atomic assignment should reference the left-hand side") @macroexpand begin oneAPI.@atomic a[1] = a[2] + 1 end @test_throws AtomicError("unknown @atomic expression") @macroexpand begin oneAPI.@atomic wat(a[1]) end @test_throws AtomicError("@atomic should be applied to an array reference expression") @macroexpand begin oneAPI.@atomic a = a + 1 end end end ================================================ FILE: test/dummy.ll ================================================ target datalayout = "e-p:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir-unknown-unknown" ; Function Attrs: nounwind define spir_kernel void @foo() { entry: ret void } ; Function Attrs: nounwind define spir_kernel void @bar(i32 %a) #0 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { entry: ret void } attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} !opencl.ocl.version = !{!6} !opencl.used.extensions = !{!7} !opencl.used.optional.core.features = !{!7} !opencl.compiler.options = !{!7} !1 = !{i32 1} !2 = !{!"none"} !3 = !{!"int"} !4 = !{!"int"} !5 = !{!""} !6 = !{i32 1, i32 2} !7 = !{} ================================================ FILE: test/dummy.spt ================================================ 119734787 65536 393230 12 0 2 Capability Addresses 2 Capability Kernel 5 ExtInstImport 1 "OpenCL.std" 3 MemoryModel 1 2 4 EntryPoint 6 4 "foo" 4 EntryPoint 6 8 "bar" 9 String 11 "kernel_arg_type.bar.int," 3 Source 3 102000 4 Name 5 "entry" 3 Name 9 "a" 4 Name 10 "entry" 4 TypeInt 6 32 0 2 TypeVoid 2 3 TypeFunction 3 2 4 TypeFunction 7 2 6 5 Function 2 4 0 3 2 Label 5 1 Return 1 FunctionEnd 5 Function 2 8 0 7 3 FunctionParameter 6 9 2 Label 10 1 Return 1 FunctionEnd ================================================ FILE: test/examples.jl ================================================ @testset "examples" begin function find_sources(path::String, sources=String[]) if isdir(path) for entry in readdir(path) find_sources(joinpath(path, entry), sources) end elseif endswith(path, ".jl") push!(sources, path) end sources end examples_dir = joinpath(@__DIR__, "..", "examples") examples = find_sources(examples_dir) filter!(file -> readline(file) != "# EXCLUDE FROM TESTING", examples) examples = relpath.(examples, Ref(examples_dir)) @testset for example in examples cmd = `$(Base.julia_cmd()) --project=$(Base.active_project())` @test success(pipeline(`$cmd $(joinpath(examples_dir, example))`, stderr=stderr)) end end ================================================ FILE: test/execution.jl ================================================ import Adapt using StaticArrays dummy() = return @testset "@oneapi" begin @test_throws UndefVarError @oneapi undefined() @test_throws MethodError @oneapi dummy(1) @testset "low-level interface" begin k = zefunction(dummy) k() k(; items=1) end @testset "launch configuration" begin @oneapi dummy() items = 1 @oneapi items dummy() @oneapi items=1 dummy() @oneapi items=(1,1) dummy() @oneapi items=(1,1,1) dummy() groups = 1 @oneapi groups dummy() @oneapi groups=1 dummy() @oneapi groups=(1,1) dummy() @oneapi groups=(1,1,1) dummy() end @testset "launch=false" begin k = @oneapi launch=false dummy() k() k(; items=1) end @testset "inference" begin foo() = @oneapi dummy() @inferred foo() # with arguments, we call kernel_convert kernel(a) = return bar(a) = @oneapi kernel(a) @inferred bar(oneArray([1])) end @testset "reflection" begin oneAPI.code_lowered(dummy, Tuple{}) oneAPI.code_typed(dummy, Tuple{}) oneAPI.code_warntype(devnull, dummy, Tuple{}) oneAPI.code_llvm(devnull, dummy, Tuple{}) oneAPI.code_spirv(devnull, dummy, Tuple{}) @device_code_lowered @oneapi dummy() @device_code_typed @oneapi dummy() @device_code_warntype io=devnull @oneapi dummy() @device_code_llvm io=devnull @oneapi dummy() @device_code_spirv io=devnull @oneapi dummy() mktempdir() do dir @device_code dir=dir @oneapi dummy() end @test_throws ErrorException @device_code_lowered nothing # make sure kernel name aliases are preserved in the generated code @test occursin("dummy", sprint(io->(@device_code_llvm io=io optimize=false @oneapi dummy()))) @test occursin("dummy", sprint(io->(@device_code_llvm io=io @oneapi dummy()))) @test occursin("dummy", sprint(io->(@device_code_spirv io=io @oneapi dummy()))) # make sure invalid kernels can be partially reflected upon let invalid_kernel() = throw() @test_throws oneAPI.InvalidIRError @oneapi invalid_kernel() @test_throws oneAPI.InvalidIRError @grab_output @device_code_warntype @oneapi invalid_kernel() out, err = @grab_output begin try @device_code_warntype @oneapi invalid_kernel() catch end end @test occursin("Body::Union{}", err) end # set name of kernel @test occursin("mykernel", sprint(io->(@device_code_llvm io=io begin k = zefunction(dummy, name="mykernel") k() end))) @test oneAPI.return_type(identity, Tuple{Int}) === Int @test oneAPI.return_type(sin, Tuple{Float32}) === Float32 @test oneAPI.return_type(getindex, Tuple{oneDeviceArray{Float32,1,1},Int32}) === Float32 @test oneAPI.return_type(getindex, Tuple{Base.RefValue{Integer}}) === Integer end @testset "external kernels" begin @eval module KernelModule export external_dummy external_dummy() = return end import ...KernelModule @oneapi KernelModule.external_dummy() @eval begin using ...KernelModule @oneapi external_dummy() end @eval module WrapperModule using oneAPI @eval dummy() = return wrapper() = @oneapi dummy() end WrapperModule.wrapper() end @testset "calling device function" begin @noinline child(i) = sink(i) function parent() child(1) return end @oneapi parent() end @testset "varargs" begin function kernel(args...) oneAPI.@print(args[2]) return end _, out = @grab_output begin @oneapi kernel(1, 2, 3) synchronize() end @test out == "2" end end ############################################################################################ @testset "argument passing" begin dims = (16, 16) len = prod(dims) @testset "manually allocated" begin function kernel(input, output) i = get_global_id() val = input[i] output[i] = val return end input = round.(rand(Float32, dims) * 100) output = similar(input) input_dev = oneArray(input) output_dev = oneArray(output) @oneapi items=len kernel(input_dev, output_dev) @test input ≈ Array(output_dev) end @testset "scalar through single-value array" begin function kernel(a, x) i = get_global_id() max = get_global_size() if i == max _val = a[i] x[] = _val end return end arr = round.(rand(Float32, dims) * 100) val = [0f0] arr_dev = oneArray(arr) val_dev = oneArray(val) @oneapi items=len kernel(arr_dev, val_dev) @test arr[dims...] ≈ Array(val_dev)[1] end @testset "scalar through single-value array, using device function" begin @noinline child(a, i) = a[i] function parent(a, x) i = get_global_id() max = get_global_size() if i == max _val = child(a, i) x[] = _val end return end arr = round.(rand(Float32, dims) * 100) val = [0f0] arr_dev = oneArray(arr) val_dev = oneArray(val) @oneapi items=len parent(arr_dev, val_dev) @test arr[dims...] ≈ Array(val_dev)[1] end @testset "tuples" begin # issue #7: tuples not passed by pointer function kernel(keeps, out) if keeps[1] out[] = 1 else out[] = 2 end return end keeps = (true,) d_out = oneArray(zeros(Int)) @oneapi kernel(keeps, d_out) @test Array(d_out)[] == 1 end @testset "ghost function parameters" begin # bug: ghost type function parameters are elided by the compiler len = 60 a = rand(Float32, len) b = rand(Float32, len) c = similar(a) d_a = oneArray(a) d_b = oneArray(b) d_c = oneArray(c) @eval struct ExecGhost end function kernel(ghost, a, b, c) i = get_global_id() c[i] = a[i] + b[i] return end @oneapi items=len kernel(ExecGhost(), d_a, d_b, d_c) @test a+b == Array(d_c) # bug: ghost type function parameters confused aggregate type rewriting function kernel(ghost, out, aggregate) i = get_global_id() out[i] = aggregate[1] return end @oneapi items=len kernel(ExecGhost(), d_c, (42,)) @test all(val->val==42, Array(d_c)) end @testset "immutables" begin # issue #15: immutables not passed by pointer function kernel(ptr, b) ptr[] = imag(b) return end arr = oneArray(zeros(Float32)) x = ComplexF32(2,2) @oneapi kernel(arr, x) @test Array(arr)[] == imag(x) end @testset "automatic recompilation" begin arr = oneArray(zeros(Int)) function kernel(ptr) ptr[] = 1 return end @oneapi kernel(arr) @test Array(arr)[] == 1 function kernel2(ptr) ptr[] = 2 return end @oneapi kernel2(arr) @test Array(arr)[] == 2 end @testset "automatic recompilation (bis)" begin arr = oneArray(zeros(Int)) @eval doit(ptr) = ptr[] = 1 function kernel(ptr) doit(ptr) return end @oneapi kernel(arr) @test Array(arr)[] == 1 @eval doit(ptr) = ptr[] = 2 @oneapi kernel(arr) @test Array(arr)[] == 2 end @testset "non-isbits arguments" begin function kernel1(T, i) sink(i) return end @oneapi kernel1(Int, 1) function kernel2(T, i) sink(unsafe_trunc(T,i)) return end @oneapi kernel2(Int, 1f0) end @testset "splatting" begin function kernel(out, a, b) out[] = a+b return end out = [0] out_dev = oneArray(out) @oneapi kernel(out_dev, 1, 2) @test Array(out_dev)[1] == 3 all_splat = (out_dev, 3, 4) @oneapi kernel(all_splat...) @test Array(out_dev)[1] == 7 partial_splat = (5, 6) @oneapi kernel(out_dev, partial_splat...) @test Array(out_dev)[1] == 11 end @testset "object invoke" begin # this mimics what is generated by closure conversion @eval struct KernelObject{T} <: Function val::T end function (self::KernelObject)(a) a[] = self.val return end function outer(a, val) inner = KernelObject(val) @oneapi inner(a) end a = [1f0] a_dev = oneArray(a) outer(a_dev, 2f0) @test Array(a_dev) ≈ [2f0] end @testset "closures" begin function outer(a_dev, val) function inner(a) # captures `val` a[] = val return end @oneapi inner(a_dev) end a = [1f0] a_dev = oneArray(a) outer(a_dev, 2f0) @test Array(a_dev) ≈ [2f0] end @testset "conversions" begin @eval struct Host end @eval struct Device end Adapt.adapt_storage(::oneAPI.KernelAdaptor, a::Host) = Device() Base.convert(::Type{Int}, ::Host) = 1 Base.convert(::Type{Int}, ::Device) = 2 out = [0] # convert arguments out_dev = oneArray(out) let arg = Host() @test Array(out_dev) ≈ [0] function kernel(arg, out) out[] = convert(Int, arg) return end @oneapi kernel(arg, out_dev) @test Array(out_dev) ≈ [2] end # convert tuples out_dev = oneArray(out) let arg = (Host(),) @test Array(out_dev) ≈ [0] function kernel(arg, out) out[] = convert(Int, arg[1]) return end @oneapi kernel(arg, out_dev) @test Array(out_dev) ≈ [2] end # convert named tuples out_dev = oneArray(out) let arg = (a=Host(),) @test Array(out_dev) ≈ [0] function kernel(arg, out) out[] = convert(Int, arg.a) return end @oneapi kernel(arg, out_dev) @test Array(out_dev) ≈ [2] end # don't convert structs out_dev = oneArray(out) @eval struct Nested a::Host end let arg = Nested(Host()) @test Array(out_dev) ≈ [0] function kernel(arg, out) out[] = convert(Int, arg.a) return end @oneapi kernel(arg, out_dev) @test Array(out_dev) ≈ [1] end end @testset "argument count" begin val = [0] val_dev = oneArray(val) for i in (1, 10, 20, 34) variables = ('a':'z'..., 'A':'Z'...) params = [Symbol(variables[j]) for j in 1:i] # generate a kernel body = quote function kernel(arr, $(params...)) arr[] = $(Expr(:call, :+, params...)) return end end eval(body) args = [j for j in 1:i] call = Expr(:call, :kernel, val_dev, args...) cudacall = :(@oneapi $call) eval(cudacall) @test Array(val_dev)[1] == sum(args) end end @testset "keyword arguments" begin @eval inner_kwargf(foobar;foo=1, bar=2) = nothing @oneapi (()->inner_kwargf(42;foo=1,bar=2))() @oneapi (()->inner_kwargf(42))() @oneapi (()->inner_kwargf(42;foo=1))() @oneapi (()->inner_kwargf(42;bar=2))() @oneapi (()->inner_kwargf(42;bar=2,foo=1))() end @testset "captured values" begin function f(capture::T) where {T} function kernel(ptr) ptr[] = capture return end arr = oneArray(zeros(T)) @oneapi kernel(arr) return Array(arr)[1] end using Test @test f(1) == 1 @test f(2) == 2 end end ############################################################################################ @testset "#55: invalid integers created by alloc_opt" begin function f(a) x = SVector(0f0, 0f0) v = MVector{3, Float32}(undef) for (i,_) in enumerate(x) v[i] = 1f0 end a[1] = v[1] return nothing end @oneapi f(oneArray(zeros(Float32, 1))) end @testset "#160: barrier intrinsincs should be convergent" begin # Solve L*x = r and store the result in r. function cpu(n::Int, r::Vector{Float32}) for j=1:n temp = r[j]/2f0 for k=j+1:n r[k] = r[k] - 2f0*temp end r[j] = temp end end function gpu(::Val{n},r_) where {n} tx = get_local_id() bx = get_group_id() r = oneLocalArray(Float32, n) r[tx] = r_[tx] barrier(0) for j=1:n if tx == 1 r[j] = r[j] / 2f0 end barrier(0) if tx > j && tx <= 4 r[tx] = r[tx] - 2f0*r[j] end barrier(0) end if bx == 1 r_[tx] = r[tx] end return end A = Float32[10, 10] n = length(A) hA = copy(A) cpu(n,hA) dA = oneArray(A) @oneapi items=n gpu(Val(n),dA) @test Array(dA) == hA end @testset "NEO#172" begin # conversions from integers to pointers resulted in lost memory stores function kernel(ptr) ptr = reinterpret(Core.LLVMPtr{Float32, AS.CrossWorkgroup}, ptr) unsafe_store!(ptr, 42) return end if VERSION < v"1.12" a = oneArray(Float32[0]) @oneapi kernel(pointer(a)) @test Array(a) == [42] else @test_broken false end end ############################################################################################ ================================================ FILE: test/fft.jl ================================================ using Test using oneAPI using oneAPI.oneMKL.FFT using AbstractFFTs using FFTW using Random Random.seed!(1234) # Helper to move data to GPU gpu(A::AbstractArray{T}) where T = oneAPI.oneArray{T}(A) struct _Plan end struct _FFT end const MYRTOL = 1e-5 const MYATOL = 1e-8 function cmp(a,b; rtol=MYRTOL, atol=MYATOL) @test isapprox(Array(a), Array(b); rtol=rtol, atol=atol) end function test_plan(::_Plan, plan, X::AbstractArray{T,N}) where {T,N} p = plan(X) Y = p * X return Y end function test_plan(::_FFT, f, X::AbstractArray{T,N}) where {T,N} Y = if f === AbstractFFTs.irfft || f === AbstractFFTs.brfft f(X, size(X, ndims(X))*2 - 2) else f(X) end return Y end function test_plan(t, plan::Function, dim::Tuple, T::Type, iplan=nothing) X = rand(T, dim) dX = gpu(X) Y = test_plan(t, plan, X) dY = test_plan(t, plan, dX) cmp(dY, Y) if iplan !== nothing iX = test_plan(t, iplan, Y) idX = test_plan(t, iplan, dY) cmp(idX, iX) end end @testset "FFT" begin @testset "$(length(dim))D" for dim in [(8,), (8,32), (8,32,64)] test_plan(_Plan(), AbstractFFTs.plan_fft, dim, ComplexF32, AbstractFFTs.plan_ifft) test_plan(_Plan(), AbstractFFTs.plan_fft, dim, ComplexF32, AbstractFFTs.plan_bfft) test_plan(_Plan(), AbstractFFTs.plan_fft, dim, Float32, AbstractFFTs.plan_ifft) test_plan(_Plan(), AbstractFFTs.plan_fft, dim, Float32, AbstractFFTs.plan_bfft) test_plan(_Plan(), AbstractFFTs.plan_rfft, dim, Float32) test_plan(_Plan(), AbstractFFTs.plan_fft!, dim, ComplexF32, AbstractFFTs.plan_bfft!) # Not part of FFTW # test_plan(AbstractFFTs.plan_rfft!, Float32) test_plan(_FFT(), AbstractFFTs.fft, dim, ComplexF32, AbstractFFTs.ifft) test_plan(_FFT(), AbstractFFTs.fft, dim, ComplexF32, AbstractFFTs.bfft) if length(dim) == 1 # irfft/brfft only for 1D test_plan(_FFT(), AbstractFFTs.rfft, dim, Float32, AbstractFFTs.irfft) test_plan(_FFT(), AbstractFFTs.rfft, dim, Float32, AbstractFFTs.brfft) end if (ComplexF64 in eltypes) && (Float64 in eltypes) test_plan(_Plan(), AbstractFFTs.plan_fft, dim, ComplexF64, AbstractFFTs.plan_ifft) test_plan(_Plan(), AbstractFFTs.plan_fft, dim, ComplexF64, AbstractFFTs.plan_bfft) test_plan(_Plan(), AbstractFFTs.plan_fft, dim, Float64, AbstractFFTs.plan_ifft) test_plan(_Plan(), AbstractFFTs.plan_fft, dim, Float64, AbstractFFTs.plan_bfft) test_plan(_Plan(), AbstractFFTs.plan_rfft, dim, Float64) test_plan(_Plan(), AbstractFFTs.plan_fft!, dim, ComplexF64, AbstractFFTs.plan_bfft!) # Not part of FFTW # test_plan(AbstractFFTs.plan_rfft!, Float64) test_plan(_FFT(), AbstractFFTs.fft, dim, ComplexF64, AbstractFFTs.ifft) test_plan(_FFT(), AbstractFFTs.fft, dim, ComplexF64, AbstractFFTs.bfft) if length(dim) == 1 # irfft/brfft only for 1D test_plan(_FFT(), AbstractFFTs.rfft, dim, Float64, AbstractFFTs.irfft) test_plan(_FFT(), AbstractFFTs.rfft, dim, Float64, AbstractFFTs.brfft) end end end end ================================================ FILE: test/indexing.jl ================================================ using Test using oneAPI @testset "findall" begin bools1d = oneArray([true, false, true, false, true]) @test Array(findall(bools1d)) == findall(Bool[true, false, true, false, true]) bools2d = oneArray(Bool[true false; false true; true false]) @test Array(findall(bools2d)) == findall(Bool[true false; false true; true false]) all_false = oneArray(fill(false, 4)) @test Array(findall(all_false)) == Int[] all_true = oneArray(fill(true, 3, 2)) @test Array(findall(all_true)) == findall(fill(true, 3, 2)) data = oneArray(collect(1:6)) mask = oneArray(Bool[true, false, true, false, false, true]) @test Array(data[mask]) == collect(1:6)[findall(Bool[true, false, true, false, false, true])] # Test with array larger than 1024 to trigger multiple groups large_size = 2048 large_mask = oneArray(rand(Bool, large_size)) large_result_gpu = Array(findall(large_mask)) large_result_cpu = findall(Array(large_mask)) @test large_result_gpu == large_result_cpu # Test with even larger array to ensure robustness very_large_size = 5000 very_large_mask = oneArray(fill(true, very_large_size)) # all true for predictable result very_large_result_gpu = Array(findall(very_large_mask)) very_large_result_cpu = findall(fill(true, very_large_size)) @test very_large_result_gpu == very_large_result_cpu end @testset "CartesianIndices with mapreduce" begin # Test for bug fix: mapreduce with CartesianIndices and tuple reduction # Previously failed due to SPIR-V codegen issues with nested insertvalue instructions # when combining tuples of (bool, CartesianIndex) in reduction operations. # The fix involved properly handling nested struct insertions in SPIR-V codegen. # Test that we can zip CartesianIndices with array values in a mapreduce # This tests the fix for nested tuple operations in SPIR-V codegen # Simple test: sum of values while tracking indices x = oneArray(ones(Int, 2, 2)) indices = CartesianIndices((2, 2)) # Map to tuple of (value, index), then reduce by summing the values result = mapreduce(tuple, (t1, t2) -> (t1[1] + t2[1], t1[2]), x, indices; init = (0, CartesianIndex(0, 0))) @test result[1] == 4 # sum of four 1s # Test with 1D array y = oneArray(ones(Int, 4)) indices_1d = CartesianIndices((4,)) result_1d = mapreduce(tuple, (t1, t2) -> (t1[1] + t2[1], t1[2]), y, indices_1d; init = (0, CartesianIndex(0,))) @test result_1d[1] == 4 # Test with boolean array and index comparison (closer to original failure case) # This pattern is similar to what findfirst would use internally z = oneArray([false, true, false, true]) indices_z = CartesianIndices((4,)) result_z = mapreduce(tuple, (t1, t2) -> begin (found1, idx1), (found2, idx2) = t1, t2 # Return the first found index (smallest index if both found) if found1 return (found1, idx1) else return (found2, idx2) end end, z, indices_z; init = (false, CartesianIndex(0,))) @test result_z[1] == true # Found a true value @test result_z[2] == CartesianIndex(2,) # First true is at index 2 end ================================================ FILE: test/kernelabstractions.jl ================================================ import KernelAbstractions include(joinpath(dirname(pathof(KernelAbstractions)), "..", "test", "testsuite.jl")) skip_tests=Set([ "sparse", "Convert", # Need to opt out of i128 ]) Testsuite.testsuite(oneAPIBackend, "oneAPI", oneAPI, oneArray, oneDeviceArray; skip_tests) ================================================ FILE: test/level-zero.jl ================================================ using oneAPI.oneL0 # ensure that the driver we loaded is a versioned library, matching the Level Zero loader. # otherwise we risk loading multiple drivers, e.g., if a system driver is available. if oneL0.NEO_jll.is_available() @test endswith(oneL0.NEO_jll.libze_intel_gpu, ".1") end @testset "driver" begin drvs = drivers() @assert !isempty(drvs) drv = first(drvs) @test drv == drvs[1] show(devnull, drv) show(devnull, MIME("text/plain"), drv) api_version(drv) properties(drv) ipc_properties(drv) extension_properties(drv) end drv = first(drivers()) @testset "device" begin devs = devices(drv) @assert !isempty(devs) dev = first(devs) @test dev == devs[1] show(devnull, dev) show(devnull, MIME("text/plain"), dev) @test collect(devices()) == collect(devices(drv)) @test device!(dev) == dev properties(dev) compute_properties(dev) module_properties(dev) memory_properties(dev) memory_access_properties(dev) cache_properties(dev) image_properties(dev) p2p_properties(dev, dev) end dev = first(devices(drv)) @testset "context" begin ctx = ZeContext(drv) show(devnull, ctx) #status(ctx) end ctx = ZeContext(drv) @testset "command" begin groups = command_queue_groups(dev) @test !isempty(groups) groups = compute_groups(dev) group = first(groups) queue = ZeCommandQueue(ctx, dev, group.ordinal) list = ZeCommandList(ctx, dev, group.ordinal) close(list) execute!(queue, [list]) synchronize(queue) reset(list) list = ZeCommandList(ctx, dev, group.ordinal) do list @test list isa ZeCommandList end execute!(queue) do list @test list isa ZeCommandList end end group = first(compute_groups(dev)) queue = ZeCommandQueue(ctx, dev, group.ordinal) @testset "fence" begin fence = ZeFence(queue) @test !Base.isdone(fence) execute!(queue, fence) do list # do nothing, but signal the fence on completion end wait(fence) @test Base.isdone(fence) reset(fence) @test !Base.isdone(fence) end @testset "event" begin ZeEventPool(ctx, 1) ZeEventPool(ctx, 1, dev) pool = ZeEventPool(ctx, 1) event = pool[1] @test !Base.isdone(event) signal(event) ZeCommandList(ctx, dev, group.ordinal) do list append_signal!(list, event) end @test Base.isdone(event) wait(event, 1) ZeCommandList(ctx, dev, group.ordinal) do list append_wait!(list, event) end reset(event) ZeCommandList(ctx, dev, group.ordinal) do list append_reset!(list, event) end # timed_pool = ZeEventPool(ctx, 1; flags=oneL0.ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP) # timed_event = timed_pool[1] # @test kernel_timestamp(timed_event).global.start == nothing # @test kernel_timestamp(timed_event).context.start == nothing # signal(timed_event) # FIXME: A kernel timestamp event can only be signaled from zeCommandListAppendLaunchKernel et al. functions # @test kernel_timestamp(timed_event).global.start != nothing # @test kernel_timestamp(timed_event).context.start != nothing end @testset "barrier" begin pool = ZeEventPool(ctx, 1) event = pool[1] ZeCommandList(ctx, dev, group.ordinal) do list append_barrier!(list) append_barrier!(list, event) append_barrier!(list, event, event) end #device_barrier(dev) # unsupported end @testset "module" begin data = read(joinpath(@__DIR__, "dummy.spv")) mod = ZeModule(ctx, dev, data) @test length(kernels(mod)) == 2 @test haskey(kernels(mod), "foo") @test !haskey(kernels(mod), "baz") kernel = kernels(mod)["foo"] suggest_groupsize(kernel, 1024) groupsize!(kernel, 1) groupsize!(kernel, (1,)) groupsize!(kernel, (1, 1)) groupsize!(kernel, (1, 1, 1)) kernel = kernels(mod)["bar"] arguments(kernel)[1] = Int32(42) @test indirect_access(kernel) == 0 indirect_access!(kernel, oneL0.ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE) @test indirect_access(kernel) == oneL0.ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE if !parameter_validation # oneapi-src/level-zero#55 attrs = source_attributes(kernel) @test attrs isa Vector{<:AbstractString} end props = properties(kernel) @test props.numKernelArgs == 1 @test props.requiredGroupSize isa oneL0.ZeDim3 @testset "kernel execution" begin ZeCommandList(ctx, dev, group.ordinal) do list append_launch!(list, kernel, 1) end pool = ZeEventPool(ctx, 2) signal_event = pool[1] wait_event = pool[2] execute!(queue) do list append_launch!(list, kernel, 1, signal_event, wait_event) end @test !Base.isdone(signal_event) signal(wait_event) synchronize(queue) @test Base.isdone(signal_event) end end @testset "memory" begin buf = device_alloc(ctx, dev, 1024) props = properties(buf) @test props.device == dev @test props.type == oneL0.ZE_MEMORY_TYPE_DEVICE @test_throws ArgumentError convert(Ptr{Cvoid}, buf) ptr = convert(ZePtr{Cvoid}, buf) @test lookup_alloc(ctx, ptr) isa typeof(buf) free(buf) buf = host_alloc(ctx, 1024) props = properties(buf) @test props.type == oneL0.ZE_MEMORY_TYPE_HOST ptr = convert(ZePtr{Cvoid}, buf) @test lookup_alloc(ctx, ptr) isa typeof(buf) ptr = convert(Ptr{Cvoid}, buf) @test lookup_alloc(ctx, ptr) isa typeof(buf) free(buf) buf = shared_alloc(ctx, dev, 1024) props = properties(buf) @test props.type == oneL0.ZE_MEMORY_TYPE_SHARED ptr = convert(ZePtr{Cvoid}, buf) @test lookup_alloc(ctx, ptr) isa typeof(buf) ptr = convert(Ptr{Cvoid}, buf) @test lookup_alloc(ctx, ptr) isa typeof(buf) free(buf) end @testset "copy" begin let src = rand(Int, 1024) chk = ones(Int, length(src)) dst = device_alloc(ctx, dev, sizeof(src)) execute!(queue) do list append_copy!(list, pointer(dst), pointer(src), sizeof(src)) append_barrier!(list) append_copy!(list, pointer(chk), pointer(dst), sizeof(src)) end synchronize(queue) @test chk == src # FIX: Allocate pattern in USM Host Memory # Standard Host memory (stack/heap) is not accessible by discrete GPUs for fill patterns. # We must use USM Host Memory. pattern_val = 42 pattern_buf = oneL0.host_alloc(ctx, sizeof(Int), Base.datatype_alignment(Int)) unsafe_store!(convert(Ptr{Int}, pattern_buf), pattern_val) execute!(queue) do list # Use the USM pointer (converted to ZePtr) append_fill!(list, pointer(dst), convert(ZePtr{Int}, pattern_buf), sizeof(Int), sizeof(src)) append_barrier!(list) append_copy!(list, pointer(chk), pointer(dst), sizeof(src)) end synchronize(queue) oneL0.free(pattern_buf) @test all(isequal(42), chk) free(dst) end for buf in [device_alloc(ctx, dev, 1024), host_alloc(ctx, 1024), shared_alloc(ctx, dev, 1024)] execute!(queue) do list append_prefetch!(list, pointer(buf), sizeof(buf)) append_advise!(list, dev, pointer(buf), sizeof(buf), oneL0.ZE_MEMORY_ADVICE_SET_READ_MOSTLY) end free(buf) end end @testset "residency" begin for buf in [device_alloc(ctx, dev, 1024), host_alloc(ctx, 1024), shared_alloc(ctx, dev, 1024)] make_resident(ctx, dev, buf) evict(ctx, dev, buf) make_resident(ctx, dev, buf, 1024) evict(ctx, dev, buf, 1024) free(buf) end end ================================================ FILE: test/onemkl.jl ================================================ if Sys.iswindows() @warn "Skipping unsupported oneKML tests" else using oneAPI using oneAPI.oneMKL: band, bandex, oneSparseMatrixCSR, oneSparseMatrixCOO, oneSparseMatrixCSC using SparseArrays using LinearAlgebra m = 20 n = 35 k = 13 @testset "Version" begin version_onemkl = oneMKL.version() @test version_onemkl ≥ v"2025.2.0" end ############################################################################################ @testset "level 1" begin @testset for T in intersect(eltypes, [Float32, Float64, ComplexF32, ComplexF64]) @testset "copy" begin A = oneArray(rand(T, m)) B = oneArray{T}(undef, m) oneMKL.copy!(m,A,B) @test Array(A) == Array(B) end @testset "axpy" begin alpha = rand(T) @test testf(axpy!, alpha, rand(T,m), rand(T,m)) end @testset "axpby" begin alpha = rand(T) beta = rand(T) @test testf(axpby!, alpha, rand(T,m), beta, rand(T,m)) end @testset "rotate" begin @test testf(rotate!, rand(T, m), rand(T, m), rand(real(T)), rand(real(T))) @test testf(rotate!, rand(T, m), rand(T, m), rand(real(T)), rand(T)) end @testset "reflect" begin @test testf(reflect!, rand(T, m), rand(T, m), rand(real(T)), rand(real(T))) @test testf(reflect!, rand(T, m), rand(T, m), rand(real(T)), rand(T)) end @testset "scal" begin # Test scal primitive [alpha/x: F32, F64, CF32, CF64] alpha = rand(T,1) @test testf(rmul!, rand(T,m), alpha[1]) # Test scal primitive [alpha - F32, F64, x - CF32, CF64] A = rand(T,m) gpuA = oneArray(A) if T === ComplexF32 alphaf32 = rand(Float32, 1) oneMKL.scal!(m, alphaf32[1], gpuA) @test Array(A .* alphaf32[1]) ≈ Array(gpuA) end if T === ComplexF64 alphaf64 = rand(Float64, 1) oneMKL.scal!(m, alphaf64[1], gpuA) @test Array(A .* alphaf64[1]) ≈ Array(gpuA) end end @testset "nrm2" begin @test testf(norm, rand(T,m)) end @testset "iamax/iamin" begin a = convert.(T, [1.0, 2.0, -0.8, 5.0, 3.0]) ca = oneArray(a) @test BLAS.iamax(a) == oneMKL.iamax(ca) @test oneMKL.iamin(ca) == 3 end @testset "swap" begin x = rand(T, m) y = rand(T, m) dx = oneArray(x) dy = oneArray(y) oneMKL.swap!(m, dx, dy) @test Array(dx) == y @test Array(dy) == x end @testset "dot" begin @test testf(dot, rand(T,m), rand(T,m)) if T == ComplexF32 || T == ComplexF64 @test testf(oneMKL.dotu, m, oneArray(rand(T,m)), oneArray(rand(T,m))) end end @testset "asum" begin @test testf(BLAS.asum, rand(T,m)) end end @testset for T in [Float16, ComplexF16] alpha = rand(T,1) A = oneArray(rand(T, m)) B = oneArray{T}(undef, m) oneMKL.copy!(m,A,B) @test Array(A) == Array(B) @test testf(axpy!, alpha[1], rand(T,m), rand(T,m)) @test testf(norm, rand(T,m)) @test testf(dot, rand(T, m), rand(T, m)) @test testf(*, transpose(rand(T, m)), rand(T,m)) @test testf(*, rand(T, m)', rand(T,m)) @test testf(rmul!, rand(T,m), alpha[1]) if T <: ComplexF16 @test testf(dot, rand(T, m), rand(T, m)) x = rand(T, m) y = rand(T, m) dx = oneArray(x) dy = oneArray(y) dz = dot(dx, dy) z = dot(x, y) @test dz ≈ z end end end @testset "level 2" begin @testset for T in intersect(eltypes, [Float32, Float64, ComplexF32, ComplexF64]) alpha = rand(T) beta = rand(T) @testset "gemv" begin @test testf(*, rand(T, m, n), rand(T, n)) @test testf(*, transpose(rand(T, m, n)), rand(T, m)) @test testf(*, rand(T, m, n)', rand(T, m)) x = rand(T, m) A = rand(T, m, m + 1 ) y = rand(T, m) dx = oneArray(x) dA = oneArray(A) dy = oneArray(y) @test_throws DimensionMismatch mul!(dy, dA, dx) A = rand(T, m + 1, m ) dA = oneArray(A) @test_throws DimensionMismatch mul!(dy, dA, dx) x = rand(T, m) A = rand(T, n, m) dx = oneArray(x) dA = oneArray(A) alpha = rand(T) dy = oneMKL.gemv('N', alpha, dA, dx) hy = collect(dy) @test hy ≈ alpha * A * x dy = oneMKL.gemv('N', dA, dx) hy = collect(dy) @test hy ≈ A * x end @testset "banded methods" begin # bands ku = 2 kl = 3 # generate banded matrix A = rand(T, m,n) A = bandex(A, kl, ku) # get packed format Ab = band(A, kl, ku) d_Ab = oneArray(Ab) x = rand(T, n) d_x = oneArray(x) synchronize() @testset "gbmv!" begin # Test: y = alpha * A * x + beta * y y = rand(T, m) d_y = oneArray(y) synchronize() oneMKL.gbmv!('N', m, kl, ku, alpha, d_Ab, d_x, beta, d_y) BLAS.gbmv!('N', m, kl, ku, alpha, Ab, x, beta, y) h_y = Array(d_y) @test y ≈ h_y # Test: y = alpha * transpose(A) * x + beta * y x = rand(T, n) d_x = oneArray(x) y = rand(T,m) d_y = oneArray(y) synchronize() oneMKL.gbmv!('T', m, kl, ku, alpha, d_Ab, d_y, beta, d_x) BLAS.gbmv!('T', m, kl, ku, alpha, Ab, y, beta, x) h_x = Array(d_x) @test x ≈ h_x # Test: y = alpha * A'*x + beta * y x = rand(T,n) d_x = oneArray(x) y = rand(T,m) d_y = oneArray(y) synchronize() oneMKL.gbmv!('C', m, kl, ku, alpha, d_Ab, d_y, beta, d_x) BLAS.gbmv!('C', m, kl, ku, alpha, Ab, y, beta, x) h_x = Array(d_x) @test x ≈ h_x # Test: alpha=1 version without y d_y = oneMKL.gbmv('N', m, kl, ku, d_Ab, d_x) y = BLAS.gbmv('N', m, kl, ku, Ab, x) h_y = Array(d_y) @test y ≈ h_y end @testset "gbmv" begin # test y = alpha*A*x x = rand(T,n) d_x = oneArray(x) d_y = oneMKL.gbmv('N', m, kl, ku, alpha, d_Ab, d_x) y = zeros(T,m) y = BLAS.gbmv('N',m,kl,ku,alpha,Ab,x) h_y = Array(d_y) @test y ≈ h_y end A = rand(T,m,m) A = A + A' nbands = 3 @test m >= 1+nbands A = bandex(A,nbands,nbands) # convert to 'upper' banded storage format AB = band(A,0,nbands) # construct x x = rand(T,m) d_AB = oneArray(AB) d_x = oneArray(x) if T <:Union{ComplexF32,ComplexF64} @testset "hbmv!" begin y = rand(T,m) d_y = oneArray(y) # hbmv! oneMKL.hbmv!('U',nbands,alpha,d_AB,d_x,beta,d_y) y = alpha*(A*x) + beta*y # compare h_y = Array(d_y) @test y ≈ h_y end @testset "hbmv" begin d_y = oneMKL.hbmv('U',nbands,d_AB,d_x) y = A*x # compare h_y = Array(d_y) @test y ≈ h_y end else @testset "sbmv!" begin y = rand(T,m) d_y = oneArray(y) # sbmv! oneMKL.sbmv!('U',nbands,alpha,d_AB,d_x,beta,d_y) y = alpha*(A*x) + beta*y # compare h_y = Array(d_y) @test y ≈ h_y end @testset "sbmv" begin d_y = oneMKL.sbmv('U',nbands,d_AB,d_x) y = A*x # compare h_y = Array(d_y) @test y ≈ h_y end end # generate triangular matrix A = rand(T,m,m) # restrict to 3 bands nbands = 3 @test m >= 1+nbands A = bandex(A,0,nbands) # convert to 'upper' banded storage format AB = band(A,0,nbands) d_AB = oneArray(AB) @testset "tbmv!" begin y = rand(T, m) # move to host d_y = oneArray(y) # tbmv! oneMKL.tbmv!('U','N','N',nbands,d_AB,d_y) y = A*y # compare h_y = Array(d_y) @test y ≈ h_y end @testset "tbmv" begin # tbmv d_y = oneMKL.tbmv('U','N','N',nbands,d_AB,d_x) y = A*x # compare h_y = Array(d_y) @test y ≈ h_y end end @testset "ger!" begin A = rand(T,m,m) x = rand(T,m) y = rand(T,m) dA = oneArray(A) dx = oneArray(x) dy = oneArray(y) # perform rank one update dB = copy(dA) oneMKL.ger!(alpha,dx,dy,dB) B = (alpha*x)*y' + A # move to host and compare hB = Array(dB) @test B ≈ hB end @testset "Triangular" begin @testset "trmv!" begin sA = rand(T,m,m) sA = sA + transpose(sA) A = triu(sA) dA = oneArray(A) x = rand(T, m) dx = oneArray(x) d_y = copy(dx) # execute trmv! oneMKL.trmv!('U','N','N',dA,d_y) y = A*x # compare h_y = Array(d_y) @test y ≈ h_y end @testset "trmv" begin sA = rand(T,m,m) sA = sA + transpose(sA) A = triu(sA) dA = oneArray(A) x = rand(T, m) dx = oneArray(x) d_y = copy(dx) d_y = oneMKL.trmv('U','N','N',dA,dx) y = A*x # compare h_y = Array(d_y) @test y ≈ h_y end @testset "trsv!" begin sA = rand(T,m,m) sA = sA + transpose(sA) A = triu(sA) dA = oneArray(A) x = rand(T, m) dx = oneArray(x) d_y = copy(dx) # execute trsv! oneMKL.trsv!('U','N','N',dA,d_y) y = A\x # compare h_y = Array(d_y) @test y ≈ h_y end @testset "trsv" begin sA = rand(T,m,m) sA = sA + transpose(sA) A = triu(sA) dA = oneArray(A) x = rand(T, m) dx = oneArray(x) d_y = oneMKL.trsv('U','N','N',dA,dx) y = A\x # compare h_y = Array(d_y) @test y ≈ h_y end @testset "trsv (adjoint)" begin sA = rand(T,m,m) sA = sA + transpose(sA) A = triu(sA) dA = oneArray(A) x = rand(T, m) dx = oneArray(x) d_y = oneMKL.trsv('U','C','N',dA,dx) y = adjoint(A)\x # compare h_y = Array(d_y) @test y ≈ h_y end @testset "trsv (transpose)" begin sA = rand(T,m,m) sA = sA + transpose(sA) A = triu(sA) dA = oneArray(A) x = rand(T, m) dx = oneArray(x) d_y = oneMKL.trsv('U','T','N',dA,dx) y = transpose(A)\x # compare h_y = Array(d_y) @test y ≈ h_y end end end @testset for T in intersect(eltypes, [ComplexF32, ComplexF64]) alpha = rand(T) beta = rand(T) @testset "hemv!" begin A = rand(T,m,n) dA = oneArray(A) sA = rand(T,m,m) sA = sA + transpose(sA) dsA = oneArray(sA) hA = rand(T,m,m) hA = hA + hA' dhA = oneArray(hA) x = rand(T,m) dx = oneArray(x) y = rand(T,m) dy = oneArray(y) synchronize() # execute on host BLAS.hemv!('U',alpha,hA,x,beta,y) # execute on device oneMKL.hemv!('U',alpha,dhA,dx,beta,dy) # compare results hy = Array(dy) @test y ≈ hy end @testset "hemv" begin A = rand(T,m,n) dA = oneArray(A) sA = rand(T,m,m) sA = sA + transpose(sA) dsA = oneArray(sA) hA = rand(T,m,m) hA = hA + hA' dhA = oneArray(hA) x = rand(T,m) dx = oneArray(x) y = rand(T,m) dy = oneArray(y) synchronize() y = BLAS.hemv('U',hA,x) # execute on device dy = oneMKL.hemv('U',dhA,dx) # compare results hy = Array(dy) @test y ≈ hy end @testset "her!" begin A = rand(T,m,n) dA = oneArray(A) sA = rand(T,m,m) sA = sA + transpose(sA) dsA = oneArray(sA) hA = rand(T,m,m) hA = hA + hA' dhA = oneArray(hA) x = rand(T,m) dx = oneArray(x) dB = copy(dhA) # perform rank one update oneMKL.her!('U',real(alpha),dx,dB) B = (real(alpha)*x)*x' + hA # move to host and compare upper triangles hB = Array(dB) B = triu(B) hB = triu(hB) @test B ≈ hB end @testset "her2!" begin A = rand(T,m,n) dA = oneArray(A) sA = rand(T,m,m) sA = sA + transpose(sA) dsA = oneArray(sA) hA = rand(T,m,m) hA = hA + hA' dhA = oneArray(hA) x = rand(T,m) dx = oneArray(x) y = rand(T,m) dy = oneArray(y) dB = copy(dhA) oneMKL.her2!('U',real(alpha),dx,dy,dB) B = (real(alpha)*x)*y' + y*(real(alpha)*x)' + hA # move to host and compare upper triangles hB = Array(dB) B = triu(B) hB = triu(hB) @test B ≈ hB end end @testset "symmetric" begin @testset for T in intersect(eltypes, [Float32, Float64]) alpha = rand(T) beta = rand(T) A = rand(T,m,m) A = A + A' nbands = 3 @test m >= 1+nbands A = bandex(A,nbands,nbands) # convert to 'upper' banded storage format AB = band(A,0,nbands) # construct x x = rand(T,m) d_AB = oneArray(AB) d_x = oneArray(x) @testset "symv tests" begin x = rand(T,m) sA = rand(T, m, m) sA = sA + transpose(sA) dsA = oneArray(sA) dx = oneArray(x) synchronize() @testset "symv!" begin # generate vectors y = rand(T,m) # copy to device dy = oneArray(y) synchronize() # execute on host BLAS.symv!('U',alpha,sA,x,beta,y) # execute on device oneMKL.symv!('U',alpha,dsA,dx,beta,dy) # compare results hy = Array(dy) @test y ≈ hy end @testset "symv" begin y = BLAS.symv('U',sA,x) # execute on device dy = oneMKL.symv('U',dsA,dx) # compare results hy = Array(dy) @test y ≈ hy end end @testset "syr!" begin x = rand(T,m) sA = rand(T, m, m) sA = sA + transpose(sA) dsA = oneArray(sA) dx = oneArray(x) dB = copy(dsA) oneMKL.syr!('U',alpha,dx,dB) B = (alpha*x)*transpose(x) + sA # move to host and compare upper triangles hB = Array(dB) B = triu(B) hB = triu(hB) @test B ≈ hB end end end end @testset "level 3" begin @testset for T in intersect(eltypes, [Float32, Float64, ComplexF32, ComplexF64]) alpha = rand(T) beta = rand(T) B = rand(T,m,n) C = rand(T,m,n) Bbad = rand(T,m+1,n+1) d_B = oneArray(B) d_C = oneArray(C) d_Bbad = oneArray(Bbad) sA = rand(T,m,m) sA = sA + transpose(sA) dsA = oneArray(sA) @testset "symm!" begin oneMKL.symm!('L','U',alpha,dsA,d_B,beta,d_C) C = (alpha*sA)*B + beta*C # compare h_C = Array(d_C) @test C ≈ h_C @test_throws DimensionMismatch oneMKL.symm!('L','U',alpha,dsA,d_Bbad,beta,d_C) end @testset "symm" begin d_C = oneMKL.symm('L','U',dsA,d_B) C = sA*B # compare h_C = Array(d_C) @test C ≈ h_C @test_throws DimensionMismatch oneMKL.symm('L','U',dsA,d_Bbad) end @testset "syrk" begin A = rand(T,m,k) d_A = oneArray(A) d_C = oneMKL.syrk('U','N',d_A) C = A*transpose(A) C = triu(C) # move to host and compare h_C = Array(d_C) h_C = triu(C) @test C ≈ h_C end A = rand(T,m,k) B = rand(T,m,k) Bbad = rand(T,m+1,k+1) C = rand(T,m,m) C = C + transpose(C) # move to device d_A = oneArray(A) d_B = oneArray(B) d_Bbad = oneArray(Bbad) d_C = oneArray(C) @testset "syr2k!" begin # compute C = alpha*(A*transpose(B) + B*transpose(A)) + beta*C oneMKL.syr2k!('U','N',alpha,d_A,d_B,beta,d_C) # move back to host and compare C = triu(C) h_C = Array(d_C) h_C = triu(h_C) @test C ≈ h_C @test_throws DimensionMismatch oneMKL.syr2k!('U','N',alpha,d_A,d_Bbad,beta,d_C) end @testset "syr2k" begin C = alpha*(A*transpose(B) + B*transpose(A)) d_C = oneMKL.syr2k('U','N',alpha,d_A,d_B) # move back to host and compare C = triu(C) h_C = Array(d_C) h_C = triu(h_C) @test C ≈ h_C end @testset "trmm!" begin A = triu(rand(T, m, m)) B = rand(T,m,n) dA = oneArray(A) dB = oneArray(B) C = alpha*A*B oneMKL.trmm!('L','U','N','N',alpha,dA,dB) # move to host and compare h_C = Array(dB) @test C ≈ h_C end @testset "trmm" begin A = triu(rand(T, m, m)) B = rand(T,m,n) dA = oneArray(A) dB = oneArray(B) C = alpha*A*B dC = oneMKL.trmm('L','U','N','N',alpha,dA,dB) # move to host and compare h_C = Array(dC) @test C ≈ h_C end @testset "left trsm!" begin A = triu(rand(T, m, m)) B = rand(T,m,n) dA = oneArray(A) dB = oneArray(B) C = alpha*(A\B) dC = copy(dB) oneMKL.trsm!('L','U','N','N',alpha,dA,dC) @test C ≈ Array(dC) end @testset "left trsm" begin A = triu(rand(T, m, m)) B = rand(T,m,n) dA = oneArray(A) dB = oneArray(B) C = alpha*(A\B) dC = oneMKL.trsm('L','U','N','N',alpha,dA,dB) @test C ≈ Array(dC) end @testset "left trsm (adjoint)" begin A = triu(rand(T, m, m)) B = rand(T,m,n) dA = oneArray(A) dB = oneArray(B) C = alpha*(adjoint(A)\B) dC = oneMKL.trsm('L','U','C','N',alpha,dA,dB) @test C ≈ Array(dC) end @testset "left trsm (transpose)" begin A = triu(rand(T, m, m)) B = rand(T,m,n) dA = oneArray(A) dB = oneArray(B) C = alpha*(transpose(A)\B) dC = oneMKL.trsm('L','U','T','N',alpha,dA,dB) @test C ≈ Array(dC) end let A = rand(T, m,m), B = triu(rand(T, m, m)), alpha = rand(T) dA = oneArray(A) dB = oneArray(B) @testset "right trsm!" begin C = alpha*(A/B) dC = copy(dA) oneMKL.trsm!('R','U','N','N',alpha,dB,dC) @test C ≈ Array(dC) end @testset "right trsm" begin C = alpha*(A/B) dC = oneMKL.trsm('R','U','N','N',alpha,dB,dA) @test C ≈ Array(dC) end @testset "right trsm (adjoint)" begin C = alpha*(A/adjoint(B)) dC = oneMKL.trsm('R','U','C','N',alpha,dB,dA) @test C ≈ Array(dC) end @testset "right trsm (transpose)" begin C = alpha*(A/transpose(B)) dC = oneMKL.trsm('R','U','T','N',alpha,dB,dA) @test C ≈ Array(dC) end end if T <:Union{ComplexF32,ComplexF64} @testset "hemm!" begin B = rand(T,m,n) C = rand(T,m,n) d_B = oneArray(B) d_C = oneArray(C) hA = rand(T,m,m) hA = hA + hA' dhA = oneArray(hA) # compute C = alpha*(hA*B) + beta*C oneMKL.hemm!('L','L',alpha,dhA,d_B,beta,d_C) # move to host and compare h_C = Array(d_C) @test C ≈ h_C end @testset "hemm" begin B = rand(T,m,n) C = rand(T,m,n) d_B = oneArray(B) d_C = oneArray(C) hA = rand(T,m,m) hA = hA + hA' dhA = oneArray(hA) C = hA*B d_C = oneMKL.hemm('L','U',dhA,d_B) # move to host and compare h_C = Array(d_C) @test C ≈ h_C end @testset "herk!" begin B = rand(T,m,n) C = rand(T,m,n) d_B = oneArray(B) d_C = oneArray(C) hA = rand(T,m,m) hA = hA + hA' dhA = oneArray(hA) A = rand(T,m,k) d_A = oneArray(A) d_C = oneArray(dhA) oneMKL.herk!('U','N',real(alpha),d_A,real(beta),d_C) C = real(alpha)*(A*A') + real(beta)*hA C = triu(C) # move to host and compare h_C = Array(d_C) h_C = triu(C) @test C ≈ h_C end @testset "herk" begin B = rand(T,m,n) C = rand(T,m,n) d_B = oneArray(B) d_C = oneArray(C) hA = rand(T,m,m) hA = hA + hA' dhA = oneArray(hA) A = rand(T,m,k) d_A = oneArray(A) d_C = oneMKL.herk('U','N',d_A) C = A*A' C = triu(C) # move to host and compare h_C = Array(d_C) h_C = triu(C) @test C ≈ h_C end @testset "her2k!" begin A = rand(T,m,k) B = rand(T,m,k) Bbad = rand(T,m+1,k+1) C = rand(T,m,m) C = C + transpose(C) # move to device d_A = oneArray(A) d_B = oneArray(B) d_Bbad = oneArray(Bbad) d_C = oneArray(C) elty1 = T elty2 = real(T) # generate parameters α = rand(elty1) β = rand(elty2) C = C + C' d_C = oneArray(C) C = α*(A*B') + conj(α)*(B*A') + β*C oneMKL.her2k!('U','N',α,d_A,d_B,β,d_C) # move back to host and compare C = triu(C) h_C = Array(d_C) h_C = triu(h_C) @test C ≈ h_C @test_throws DimensionMismatch oneMKL.her2k!('U','N',α,d_A,d_Bbad,β,d_C) end @testset "her2k" begin A = rand(T,m,k) B = rand(T,m,k) Bbad = rand(T,m+1,k+1) C = rand(T,m,m) C = C + transpose(C) # move to device d_A = oneArray(A) d_B = oneArray(B) d_Bbad = oneArray(Bbad) d_C = oneArray(C) C = A*B' + B*A' d_C = oneMKL.her2k('U','N',d_A,d_B) # move back to host and compare C = triu(C) h_C = Array(d_C) h_C = triu(h_C) @test C ≈ h_C end end end @testset for T in intersect(eltypes, [Float16, Float32, Float64, ComplexF32, ComplexF64]) @testset "gemm!" begin alpha = rand(T) beta = rand(T) A = rand(T,m,k) B = rand(T,k,n) Bbad = rand(T,k+1,n+1) C1 = rand(T,m,n) C2 = copy(C1) d_A = oneArray(A) d_B = oneArray(B) d_Bbad = oneArray(Bbad) d_C1 = oneArray(C1) d_C2 = oneArray(C2) hA = rand(T,m,m) hA = hA + hA' dhA = oneArray(hA) sA = rand(T,m,m) sA = sA + transpose(sA) dsA = oneArray(sA) oneMKL.gemm!('N','N',alpha,d_A,d_B,beta,d_C1) mul!(d_C2, d_A, d_B) h_C1 = Array(d_C1) h_C2 = Array(d_C2) C1 = (alpha*A)*B + beta*C1 C2 = A*B # compare @test C1 ≈ h_C1 @test C2 ≈ h_C2 @test_throws ArgumentError mul!(dhA, dhA, dsA) @test_throws DimensionMismatch mul!(d_C1, d_A, dsA) d_c = oneMKL.gemm('N', 'N', d_A, d_B) C = A * B C2 = d_A * d_B h_C = Array(d_c) h_C2 = Array(C2) @test C ≈ h_C @test C ≈ h_C2 end end end @testset "Batch Primitives" begin @testset for T in intersect(eltypes, [Float16, Float32, Float64, ComplexF32, ComplexF64]) alpha = rand(T) beta = rand(T) group_count = 10 @testset "Gemm Batch" begin # generate matrices bA = [rand(T,m,k) for i in 1:group_count] bB = [rand(T,k,n) for i in 1:group_count] bC = [rand(T,m,n) for i in 1:group_count] # move to device bd_A = oneArray{T, 2}[] bd_B = oneArray{T, 2}[] bd_C = oneArray{T, 2}[] bd_bad = oneArray{T, 2}[] for i in 1:length(bA) push!(bd_A,oneArray(bA[i])) push!(bd_B,oneArray(bB[i])) push!(bd_C,oneArray(bC[i])) if i < length(bA) - 2 push!(bd_bad,oneArray(bC[i])) end end @testset "gemm_batched!" begin # C = (alpha*A)*B + beta*C oneMKL.gemm_batched!('N','N',alpha,bd_A,bd_B,beta,bd_C) for i in 1:length(bd_C) bC[i] = (alpha*bA[i])*bB[i] + beta*bC[i] h_C = Array(bd_C[i]) #compare @test bC[i] ≈ h_C end @test_throws DimensionMismatch oneMKL.gemm_batched!('N','N',alpha,bd_A,bd_bad,beta,bd_C) end @testset "gemm_batched" begin bd_C = oneMKL.gemm_batched('N','N',bd_A,bd_B) for i in 1:length(bA) bC = bA[i]*bB[i] h_C = Array(bd_C[i]) @test bC ≈ h_C end @test_throws DimensionMismatch oneMKL.gemm_batched('N','N',alpha,bd_A,bd_bad) end end if T <:Union{Float32, Float64, ComplexF32, ComplexF64} @testset "Trsm Batch" begin @testset "trsm_batched!" begin bA = [rand(T,m,m) for i in 1:group_count] map!((x) -> triu(x), bA, bA) bB = [rand(T,m,n) for i in 1:group_count] bBbad = [rand(T,m,n) for i in 1:(group_count-1)] # move to device bd_A = oneArray{T, 2}[] bd_B = oneArray{T, 2}[] bd_Bbad = oneArray{T, 2}[] for i in 1:length(bA) push!(bd_A,oneArray(bA[i])) push!(bd_B,oneArray(bB[i])) end for i in 1:length(bBbad) push!(bd_Bbad,oneArray(bBbad[i])) end # compute oneMKL.trsm_batched!('L','U','N','N',alpha,bd_A,bd_B) @test_throws DimensionMismatch oneMKL.trsm_batched!('L','U','N','N',alpha,bd_A,bd_Bbad) # move to host and compare for i in 1:length(bd_B) bC = alpha*(bA[i]\bB[i]) h_C = Array(bd_B[i]) #compare @test bC ≈ h_C end end @testset "trsm_batched" begin # generate parameter alpha = rand(elty) # generate matrices bA = [rand(T,m,m) for i in 1:group_count] map!((x) -> triu(x), bA, bA) bB = [rand(T,m,n) for i in 1:group_count] # move to device bd_A = oneArray{T, 2}[] bd_B = oneArray{T, 2}[] for i in 1:length(bA) push!(bd_A,oneArray(bA[i])) push!(bd_B,oneArray(bB[i])) end # compute bd_C = oneMKL.trsm_batched('L','U','N','N',alpha,bd_A,bd_B) # move to host and compare for i in 1:length(bd_C) bC = alpha*(bA[i]\bB[i]) h_C = Array(bd_C[i]) @test bC ≈ h_C end end end end end end @testset "gemm_batch_strided" begin @testset for elty in intersect(eltypes, [Float16, Float32, Float64, ComplexF32, ComplexF64]) nbatch = 10 alpha = rand(elty) beta = rand(elty) @testset "gemm_strided_batched!" begin bA = rand(elty, m, k, nbatch) bB = rand(elty, k, n, nbatch) bC = rand(elty, m, n, nbatch) bbad = rand(elty, m+1, n+1, nbatch) # move to device bd_A = oneArray{elty, 3}(bA) bd_B = oneArray{elty, 3}(bB) bd_C = oneArray{elty, 3}(bC) bd_bad = oneArray{elty, 3}(bbad) oneMKL.gemm_strided_batched!('N', 'N', alpha, bd_A, bd_B, beta, bd_C) for i in 1:nbatch bC[:, :, i] = (alpha * bA[:, :, i]) * bB[:, :, i] + beta * bC[:, :, i] end h_C = Array(bd_C) @test bC ≈ h_C @test_throws DimensionMismatch oneMKL.gemm_strided_batched!('N', 'N', alpha, bd_A, bd_B, beta, bd_bad) end @testset "gemm_strided_batched" begin # Host buffers bA = rand(elty, m, k, nbatch) bB = rand(elty, k, n, nbatch) bC = rand(elty, m, n, nbatch) bbad = rand(elty, m+1, n+1, nbatch) # Move host data to device bd_A = oneArray{elty, 3}(bA) bd_B = oneArray{elty, 3}(bB) bd_C = oneArray{elty, 3}(bC) bd_bad = oneArray{elty, 3}(bbad) # Compute oneMKL strided batch bd_C = oneMKL.gemm_strided_batched('N', 'N', bd_A, bd_B) #Compute Host for i in 1:nbatch bC[:, :, i] = bA[:, :, i] * bB[:, :, i] end h_C = Array(bd_C) @test bC ≈ h_C # generate matrices bA = rand(elty, k, m, nbatch) bB = rand(elty, k, n, nbatch) bC = zeros(elty, m, n, nbatch) # move to device bd_A = oneArray{elty, 3}(bA) bd_B = oneArray{elty, 3}(bB) bd_C = oneMKL.gemm_strided_batched('T', 'N', bd_A, bd_B) for i in 1:nbatch bC[:, :, i] = transpose(bA[:, :, i]) * bB[:, :, i] end h_C = Array(bd_C) @test bC ≈ h_C @test_throws DimensionMismatch oneMKL.gemm_strided_batched('N', 'N', alpha, bd_A, bd_bad) end end end @testset "SPARSE" begin @testset "$T" for T in intersect(eltypes, [Float32, Float64, ComplexF32, ComplexF64]) @testset "oneSparseMatrixCSR" begin for S in (Int32, Int64) A = sprand(T, 20, 10, 0.5) A = SparseMatrixCSC{T, S}(A) B = oneSparseMatrixCSR(A) A2 = SparseMatrixCSC(B) @test A == A2 C = oneSparseMatrixCSR(B.rowPtr, B.colVal, B.nzVal, size(B)) A3 = SparseMatrixCSC(C) @test A == A3 D = oneSparseMatrixCSR(oneVector(S[]), oneVector(S[]), oneVector(T[]), (0, 0)) # empty matrix end end @testset "oneSparseMatrixCSC" begin (T isa Complex) && continue for S in (Int32, Int64) A = sprand(T, 20, 10, 0.5) A = SparseMatrixCSC{T, S}(A) B = oneSparseMatrixCSC(A) A2 = SparseMatrixCSC(B) @test A == A2 C = oneSparseMatrixCSC(A.colptr |> oneVector, A.rowval |> oneVector, A.nzval |> oneVector, size(A)) A3 = SparseMatrixCSC(C) @test A == A3 D = oneSparseMatrixCSC(oneVector(S[]), oneVector(S[]), oneVector(T[]), (0, 0)) # empty matrix end end @testset "oneSparseMatrixCOO" begin for S in (Int32, Int64) A = sprand(T, 20, 10, 0.5) A = SparseMatrixCSC{T, S}(A) B = oneSparseMatrixCOO(A) A2 = SparseMatrixCSC(B) @test A == A2 end end @testset "sparse gemv" begin @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCOO, oneSparseMatrixCSR, oneSparseMatrixCSC) @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] A = sprand(T, 20, 10, 0.5) x = transa == 'N' ? rand(T, 10) : rand(T, 20) y = transa == 'N' ? rand(T, 20) : rand(T, 10) dA = SparseMatrix(A) dx = oneVector{T}(x) dy = oneVector{T}(y) alpha = rand(T) beta = rand(T) oneMKL.sparse_optimize_gemv!(transa, dA) oneMKL.sparse_gemv!(transa, alpha, dA, dx, beta, dy) @test alpha * opa(A) * x + beta * y ≈ collect(dy) end end end @testset "sparse gemm" begin @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] @testset "transb = $transb" for (transb, opb) in [('N', identity), ('T', transpose), ('C', adjoint)] (transb == 'N') || continue A = sprand(T, 10, 10, 0.5) B = transb == 'N' ? rand(T, 10, 2) : rand(T, 2, 10) C = rand(T, 10, 2) dA = SparseMatrix(A) dB = oneMatrix{T}(B) dC = oneMatrix{T}(C) alpha = rand(T) beta = rand(T) oneMKL.sparse_optimize_gemm!(transa, dA) oneMKL.sparse_gemm!(transa, transb, alpha, dA, dB, beta, dC) @test alpha * opa(A) * opb(B) + beta * C ≈ collect(dC) oneMKL.sparse_optimize_gemm!(transa, transb, 2, dA) end end end end @testset "sparse symv" begin @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) @testset "uplo = $uplo" for uplo in ('L', 'U') A = sprand(T, 10, 10, 0.5) A = A + transpose(A) x = rand(T, 10) y = rand(T, 10) dA = uplo == 'L' ? SparseMatrix(A |> tril) : SparseMatrix(A |> triu) dx = oneVector{T}(x) dy = oneVector{T}(y) alpha = rand(T) beta = rand(T) oneMKL.sparse_symv!(uplo, alpha, dA, dx, beta, dy) @test alpha * A * x + beta * y ≈ collect(dy) end end end @testset "sparse trmv" begin @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] for (uplo, diag, wrapper) in [ ('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular), ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular), ] (transa == 'N') || continue A = sprand(T, 10, 10, 0.5) x = rand(T, 10) y = rand(T, 10) B = uplo == 'L' ? tril(A) : triu(A) B = diag == 'U' ? B - Diagonal(B) + I : B dA = SparseMatrix(B) dx = oneVector{T}(x) dy = oneVector{T}(y) alpha = rand(T) beta = rand(T) if SparseMatrix == oneSparseMatrixCSC @test_broken sparse_optimize_trmv!(uplo, transa, diag, dA) # Intel oneAPI limitation: CSC triangular operations not supported @test_throws ArgumentError oneMKL.sparse_optimize_trmv!(uplo, transa, diag, dA) @test_throws ArgumentError oneMKL.sparse_trmv!(uplo, transa, diag, alpha, dA, dx, beta, dy) else oneMKL.sparse_optimize_trmv!(uplo, transa, diag, dA) oneMKL.sparse_trmv!(uplo, transa, diag, alpha, dA, dx, beta, dy) @test alpha * wrapper(opa(A)) * x + beta * y ≈ collect(dy) end end end end end @testset "sparse trsv" begin @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular), ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular), ] (transa == 'N') || continue alpha = rand(T) A = rand(T, 10, 10) + I A = sparse(A) x = rand(T, 10) y = rand(T, 10) B = uplo == 'L' ? tril(A) : triu(A) B = diag == 'U' ? B - Diagonal(B) + I : B dA = SparseMatrix(B) dx = oneVector{T}(x) dy = oneVector{T}(y) if SparseMatrix == oneSparseMatrixCSC @test_broken sparse_optimize_trsv!(uplo, transa, diag, dA) # Intel oneAPI limitation: CSC triangular operations not supported @test_throws ArgumentError oneMKL.sparse_optimize_trsv!(uplo, transa, diag, dA) @test_throws ArgumentError oneMKL.sparse_trsv!(uplo, transa, diag, alpha, dA, dx, dy) else oneMKL.sparse_optimize_trsv!(uplo, transa, diag, dA) oneMKL.sparse_trsv!(uplo, transa, diag, alpha, dA, dx, dy) y = wrapper(opa(A)) \ (alpha * x) @test y ≈ collect(dy) end end end end end @testset "sparse trsm" begin @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] @testset "transx = $transx" for (transx, opx) in [('N', identity), ('T', transpose), ('C', adjoint)] (transx != 'N') && continue for (uplo, diag, wrapper) in [ ('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular), ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular), ] (transa == 'N') || continue alpha = rand(T) A = rand(T, 10, 10) + I A = sparse(A) X = transx == 'N' ? rand(T, 10, 4) : rand(T, 4, 10) Y = rand(T, 10, 4) B = uplo == 'L' ? tril(A) : triu(A) B = diag == 'U' ? B - Diagonal(B) + I : B dA = SparseMatrix(B) dX = oneMatrix{T}(X) dY = oneMatrix{T}(Y) if SparseMatrix == oneSparseMatrixCSC @test_broken sparse_optimize_trsm!(uplo, transa, diag, dA) # Intel oneAPI limitation: CSC triangular operations not supported @test_throws ArgumentError oneMKL.sparse_optimize_trsm!(uplo, transa, diag, dA) @test_throws ArgumentError oneMKL.sparse_trsm!(uplo, transa, transx, diag, alpha, dA, dX, dY) @test_throws ArgumentError oneMKL.sparse_optimize_trsm!(uplo, transa, diag, 4, dA) else oneMKL.sparse_optimize_trsm!(uplo, transa, diag, dA) oneMKL.sparse_trsm!(uplo, transa, transx, diag, alpha, dA, dX, dY) Y = wrapper(opa(A)) \ (alpha * opx(X)) @test Y ≈ collect(dY) oneMKL.sparse_optimize_trsm!(uplo, transa, diag, 4, dA) end end end end end end end end @testset "LAPACK" begin @testset "$elty" for elty in intersect(eltypes, [Float32, Float64, ComplexF32, ComplexF64]) m = 15 n = 10 p = 5 @testset "geqrf!" begin A = rand(elty, m, n) d_A = oneArray(A) d_A, tau = oneMKL.geqrf!(d_A) tau_c = zeros(elty, n) LinearAlgebra.LAPACK.geqrf!(A, tau_c) @test tau_c ≈ Array(tau) end @testset "geqrf! -- orgqr!" begin A = rand(elty, m, n) dA = oneArray(A) dA, τ = oneMKL.geqrf!(dA) oneMKL.orgqr!(dA, τ) @test dA' * dA ≈ I end @testset "ormqr!" begin @testset "side = $side" for side in ['L', 'R'] @testset "trans = $trans" for (trans, op) in [('N', identity), ('T', transpose), ('C', adjoint)] (trans == 'T') && (elty <: Complex) && continue A = rand(elty, m, n) dA = oneArray(A) dA, dτ = oneMKL.geqrf!(dA) hI = Matrix{elty}(I, m, m) dI = oneArray(hI) dH = oneMKL.ormqr!(side, 'N', dA, dτ, dI) @test dH' * dH ≈ I C = side == 'L' ? rand(elty, m, n) : rand(elty, n, m) dC = oneArray(C) dD = side == 'L' ? op(dH) * dC : dC * op(dH) oneMKL.ormqr!(side, trans, dA, dτ, dC) @test dC ≈ dD end end end @testset "potrf! -- potrs!" begin A = rand(elty,n,n) A = A*A' + I B = rand(elty,n,p) d_A = oneArray(A) d_B = oneArray(B) oneMKL.potrf!('L',d_A) oneMKL.potrs!('U',d_A,d_B) LAPACK.potrf!('L',A) LAPACK.potrs!('U',A,B) @test B ≈ collect(d_B) end # @testset "sytrf!" begin # A = rand(elty,n,n) # A = A + A' # d_A = oneArray(A) # d_A, d_ipiv = oneMKL.sytrf!('U',d_A) # h_A = collect(d_A) # h_ipiv = collect(d_ipiv) # A, ipiv = LAPACK.sytrf!('U',A) # @test ipiv == h_ipiv # @test A ≈ h_A # end @testset "getrf! -- getri!" begin A = rand(elty, m, m) d_A = oneArray(A) d_A, d_ipiv = oneMKL.getrf!(d_A) h_A, ipiv = LAPACK.getrf!(A) @test h_A ≈ Array(d_A) d_A = oneMKL.getri!(d_A, d_ipiv) h_A = LAPACK.getri!(h_A, ipiv) @test h_A ≈ Array(d_A) end @testset "getrf_batched! -- getri_batched!" begin bA = [rand(elty, m, m) for i in 1:p] d_bA = oneMatrix{elty}[] for i in 1:p push!(d_bA, oneMatrix(bA[i])) end d_ipiv, d_bA = oneMKL.getrf_batched!(d_bA) h_bA = [collect(d_bA[i]) for i in 1:p] ipiv = Vector{Int64}[] for i = 1:p _, ipiv_i, info = LAPACK.getrf!(bA[i]) push!(ipiv, ipiv_i) @test bA[i] ≈ h_bA[i] end d_ipiv, d_bA = oneMKL.getri_batched!(d_bA, d_ipiv) h_bA = [collect(d_bA[i]) for i in 1:p] for i = 1:p LAPACK.getri!(bA[i], ipiv[i]) @test bA[i] ≈ h_bA[i] end end # @testset "getrs_batched!" begin # bA = [rand(elty, m, m) for i in 1:p] # bB = [rand(elty, m, n) for i in 1:p] # d_bA = oneMatrix{elty}[] # d_bB = oneMatrix{elty}[] # for i in 1:p # push!(d_bA, oneMatrix(bA[i])) # push!(d_bB, oneMatrix(bB[i])) # end # d_ipiv, d_bA = oneMKL.getrf_batched!(d_bA) # d_bX = oneMKL.getrs_batched!(d_bA, d_ipiv, d_bB) # h_bX = [collect(d_bX[i]) for i in 1:p] # for i = 1:p # @test bA[i] * hbX[i] ≈ bB[i] # end # end @testset "potrf_batched! -- potrs_batched!" begin A = [rand(elty,n,n) for i = 1:p] A = [A[i]' * A[i] + I for i = 1:p] B = [rand(elty,n,p) for i = 1:p] d_A = oneMatrix{elty}[] d_B = oneMatrix{elty}[] for i in 1:p push!(d_A, oneMatrix(A[i])) push!(d_B, oneMatrix(B[i])) end oneMKL.potrf_batched!(d_A) oneMKL.potrs_batched!(d_A, d_B) for i = 1:p LAPACK.potrf!('L', A[i]) LAPACK.potrs!('L', A[i], B[i]) @test B[i] ≈ collect(d_B[i]) end end @testset "geqrf_batched! -- -- orgqr_batched!" begin A = [rand(elty,m,n) for i in 1:p] d_A = oneMatrix{elty}[] for i in 1:p push!(d_A, oneMatrix(A[i])) end d_tau, d_A = oneMKL.geqrf_batched!(d_A) oneMKL.orgqr_batched!(d_A, d_tau) for d_Ai in d_A @test d_Ai' * d_Ai ≈ I end end @testset "gebrd!" begin A = rand(elty,m,n) d_A = oneArray(A) d_A, d_D, d_E, d_tauq, d_taup = oneMKL.gebrd!(d_A) h_A = collect(d_A) h_D = collect(d_D) h_E = collect(d_E) h_tauq = collect(d_tauq) h_taup = collect(d_taup) A,d,e,q,p = LAPACK.gebrd!(A) @test A ≈ h_A @test d ≈ h_D @test e[min(m,n)-1] ≈ h_E[min(m,n)-1] @test q ≈ h_tauq @test p ≈ h_taup end @testset "gesvd!" begin A = rand(elty,m,n) d_A = oneMatrix(A) U, Σ, Vt = oneMKL.gesvd!('A', 'A', d_A) @test A ≈ collect(U[:,1:n] * Diagonal(Σ) * Vt) for jobu in ('A', 'S', 'N', 'O') for jobvt in ('A', 'S', 'N', 'O') (jobu == 'A') && (jobvt == 'A') && continue (jobu == 'O') && (jobvt == 'O') && continue d_A = oneMatrix(A) U2, Σ2, Vt2 = oneMKL.gesvd!(jobu, jobvt, d_A) @test Σ ≈ Σ2 end end end @testset "syevd! -- heevd!" begin @testset "uplo = $uplo" for uplo in ('L', 'U') A = rand(elty,n,n) B = A + A' A = uplo == 'L' ? tril(B) : triu(B) d_A = oneMatrix(A) W, V = elty <: Real ? oneMKL.syevd!('V', uplo, d_A) : oneMKL.heevd!('V', uplo, d_A) @test B ≈ collect(V * Diagonal(W) * V') d_A = oneMatrix(A) d_W = elty <: Real ? oneMKL.syevd!('N', uplo, d_A) : oneMKL.heevd!('N', uplo, d_A) end end @testset "sygvd! -- hegvd!" begin A = rand(elty,m,m) B = rand(elty,m,m) A = A*A' + I B = B*B' + I d_A = oneArray(A) d_B = oneArray(B) d_W, d_VA, d_VB = elty <: Real ? oneMKL.sygvd!(1, 'V','U', d_A, d_B) : oneMKL.hegvd!(1, 'V','U', d_A, d_B) h_W = collect(d_W) h_VA = collect(d_VA) h_VB = collect(d_VB) Eig = eigen(Hermitian(A), Hermitian(B)) @test Eig.values ≈ h_W @test A * h_VA ≈ B * h_VA * Diagonal(h_W) rtol=1e-4 @test h_VA' * B * h_VA ≈ I end end end end # oneMKL tests ================================================ FILE: test/pointer.jl ================================================ using oneAPI.oneL0 # constructors voidptr_a = ZePtr{Cvoid}(Int(0xDEADBEEF)) @test reinterpret(Ptr{Cvoid}, voidptr_a) == Ptr{Cvoid}(Int(0xDEADBEEF)) # getters @test eltype(voidptr_a) == Cvoid # comparisons voidptr_b = ZePtr{Cvoid}(Int(0xCAFEBABE)) @test voidptr_a != voidptr_b @testset "conversions" begin # between host and device pointers @test_throws ArgumentError convert(Ptr{Cvoid}, voidptr_a) # between device pointers intptr_a = ZePtr{Int}(Int(0xDEADBEEF)) @test convert(typeof(intptr_a), voidptr_a) == intptr_a # convert back and forth from UInt intptr_b = ZePtr{Int}(Int(0xDEADBEEF)) @test convert(UInt, intptr_b) == 0xDEADBEEF @test convert(ZePtr{Int}, Int(0xDEADBEEF)) == intptr_b @test Int(intptr_b) == Int(0xDEADBEEF) # pointer arithmetic intptr_c = ZePtr{Int}(Int(0xDEADBEEF)) intptr_d = 2 + intptr_c @test isless(intptr_c, intptr_d) @test intptr_d - intptr_c == 2 @test intptr_d - 2 == intptr_c end @testset "GPU or CPU integration" begin a = [1] ccall(:clock, Nothing, (Ptr{Int},), a) @test_throws Exception ccall(:clock, Nothing, (ZePtr{Int},), a) ccall(:clock, Nothing, (PtrOrZePtr{Int},), a) b = oneArray{eltype(a), ndims(a)}(undef, size(a)) ccall(:clock, Nothing, (ZePtr{Int},), b) @test_throws Exception ccall(:clock, Nothing, (Ptr{Int},), b) ccall(:clock, Nothing, (PtrOrZePtr{Int},), b) end @testset "reference values" begin # Ref @test typeof(Base.cconvert(Ref{Int}, 1)) == typeof(Ref(1)) @test Base.unsafe_convert(Ref{Int}, Base.cconvert(Ref{Int}, 1)) isa Ptr{Int} ptr = reinterpret(Ptr{Int}, C_NULL) @test Base.cconvert(Ref{Int}, ptr) == ptr @test Base.unsafe_convert(Ref{Int}, Base.cconvert(Ref{Int}, ptr)) == ptr arr = [1] @test Base.cconvert(Ref{Int}, arr) isa Base.RefArray{Int, typeof(arr)} @test Base.unsafe_convert(Ref{Int}, Base.cconvert(Ref{Int}, arr)) == pointer(arr) # ZeRef @test typeof(Base.cconvert(ZeRef{Int}, 1)) == typeof(ZeRef(1)) @test Base.unsafe_convert(ZeRef{Int}, Base.cconvert(ZeRef{Int}, 1)) isa ZeRef{Int} zeptr = reinterpret(ZePtr{Int}, C_NULL) @test Base.cconvert(ZeRef{Int}, zeptr) == zeptr @test Base.unsafe_convert(ZeRef{Int}, Base.cconvert(ZeRef{Int}, zeptr)) == Base.bitcast(ZeRef{Int}, zeptr) zearr = oneAPI.oneArray([1]) @test Base.cconvert(ZeRef{Int}, zearr) isa oneL0.ZeRefArray{Int, typeof(zearr)} @test Base.unsafe_convert(ZeRef{Int}, Base.cconvert(ZeRef{Int}, zearr)) == Base.bitcast(ZeRef{Int}, pointer(zearr)) # RefOrZeRef @test typeof(Base.cconvert(RefOrZeRef{Int}, 1)) == typeof(Ref(1)) @test Base.unsafe_convert(RefOrZeRef{Int}, Base.cconvert(RefOrZeRef{Int}, 1)) isa RefOrZeRef{Int} @test Base.cconvert(RefOrZeRef{Int}, ptr) == ptr @test Base.unsafe_convert(RefOrZeRef{Int}, Base.cconvert(RefOrZeRef{Int}, ptr)) == Base.bitcast(RefOrZeRef{Int}, ptr) @test Base.cconvert(RefOrZeRef{Int}, zeptr) == zeptr @test Base.unsafe_convert(RefOrZeRef{Int}, Base.cconvert(RefOrZeRef{Int}, zeptr)) == Base.bitcast(RefOrZeRef{Int}, zeptr) @test Base.cconvert(RefOrZeRef{Int}, arr) isa Base.RefArray{Int, typeof(arr)} @test Base.unsafe_convert(RefOrZeRef{Int}, Base.cconvert(RefOrZeRef{Int}, arr)) == Base.bitcast(RefOrZeRef{Int}, pointer(arr)) @test Base.cconvert(RefOrZeRef{Int}, zearr) isa oneL0.ZeRefArray{Int, typeof(zearr)} @test Base.unsafe_convert(RefOrZeRef{Int}, Base.cconvert(RefOrZeRef{Int}, zearr)) == Base.bitcast(RefOrZeRef{Int}, pointer(zearr)) end ================================================ FILE: test/random.jl ================================================ using Random @testset "rand" begin # in-place for (f,T) in ((rand!,Float16), (rand!,Float32), (randn!,Float16), (randn!,Float32)), d in (2, (2,2), (2,2,2), 3, (3,3), (3,3,3)) A = oneArray{T}(undef, d) fill!(A, T(0)) f(A) @test !iszero(collect(A)) end # out-of-place, with implicit type for (f,T) in ((oneAPI.rand,Float32), (oneAPI.randn,Float32)), args in ((2,), (2, 2), (3,), (3, 3)) A = f(args...) @test eltype(A) == T end # out-of-place, with type specified for (f,T) in ((oneAPI.rand,Float32), (oneAPI.randn,Float32), (rand,Float32), (randn,Float32)), args in ((T, 2), (T, 2, 2), (T, (2, 2)), (T, 3), (T, 3, 3), (T, (3, 3))) A = f(args...) @test eltype(A) == T end ## seeding oneAPI.seed!(1) a = oneAPI.rand(Int32, 1) oneAPI.seed!(1) b = oneAPI.rand(Int32, 1) @test iszero(collect(a) - collect(b)) end # testset ================================================ FILE: test/runtests.jl ================================================ using Distributed using Dates import REPL using Printf: @sprintf using Base.Filesystem: path_separator # parse some command-line arguments function extract_flag!(args, flag, default=nothing) for f in args if startswith(f, flag) # Check if it's just `--flag` or if it's `--flag=foo` if f != flag val = split(f, '=')[2] if default !== nothing && !(typeof(default) <: AbstractString) val = parse(typeof(default), val) end else val = default end # Drop this value from our args filter!(x -> x != f, args) return (true, val) end end return (false, default) end do_help, _ = extract_flag!(ARGS, "--help") if do_help println(""" Usage: runtests.jl [--help] [--list] [--jobs=N] [TESTS...] --help Show this text. --list List all available tests. --quickfail Fail the entire run as soon as a single test errored. --jobs=N Launch `N` processes to perform tests (default: Sys.CPU_THREADS). Remaining arguments filter the tests that will be executed.""") exit(0) end _, jobs = extract_flag!(ARGS, "--jobs", Sys.CPU_THREADS) do_quickfail, _ = extract_flag!(ARGS, "--quickfail") include("setup.jl") # make sure everything is precompiled @info "System information:\n" * sprint(io->oneAPI.versioninfo(io)) if Sys.islinux() @info "Using oneAPI support library at " * oneAPI.Support.liboneapi_support end @info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." # choose tests const tests = [] const test_runners = Dict() ## files in the test folder for (rootpath, dirs, files) in walkdir(@__DIR__) # find Julia files filter!(files) do file endswith(file, ".jl") && file !== "setup.jl" && file !== "runtests.jl" end isempty(files) && continue # strip extension files = map(files) do file file[1:end-3] end # prepend subdir subdir = relpath(rootpath, @__DIR__) if subdir != "." files = map(files) do file joinpath(subdir, file) end end # unify path separators files = map(files) do file replace(file, path_separator => '/') end append!(tests, files) for file in files test_runners[file] = ()->include("$(@__DIR__)/$file.jl") end end sort!(tests; by=(file)->stat("$(@__DIR__)/$file.jl").size, rev=true) ## GPUArrays testsuite for name in keys(TestSuite.tests) pushfirst!(tests, "gpuarrays/$name") test_runners["gpuarrays/$name"] = ()->TestSuite.tests[name](oneArray) end ## finalize unique!(tests) # parse some more command-line arguments ## --list to list all available tests do_list, _ = extract_flag!(ARGS, "--list") if do_list println("Available tests:") for test in sort(tests) println(" - $test") end exit(0) end ## no options should remain optlike_args = filter(startswith("-"), ARGS) if !isempty(optlike_args) error("Unknown test options `$(join(optlike_args, " "))` (try `--help` for usage instructions)") end ## the remaining args filter tests if !isempty(ARGS) filter!(tests) do test any(arg->startswith(test, arg), ARGS) end end # add workers const test_exeflags = Base.julia_cmd() filter!(test_exeflags.exec) do c return !(startswith(c, "--depwarn") || startswith(c, "--check-bounds")) end push!(test_exeflags.exec, "--check-bounds=yes") push!(test_exeflags.exec, "--startup-file=no") push!(test_exeflags.exec, "--depwarn=yes") push!(test_exeflags.exec, "--project=$(Base.active_project())") const test_exename = popfirst!(test_exeflags.exec) function addworker(X; kwargs...) withenv("JULIA_NUM_THREADS" => 1, "OPENBLAS_NUM_THREADS" => 1) do procs = addprocs(X; exename=test_exename, exeflags=test_exeflags, kwargs...) @everywhere procs include($(joinpath(@__DIR__, "setup.jl"))) procs end end addworker(min(jobs, length(tests))) # pretty print information about gc and mem usage testgroupheader = "Test" workerheader = "(Worker)" name_align = maximum([textwidth(testgroupheader) + textwidth(" ") + textwidth(workerheader); map(x -> textwidth(x) + 3 + ndigits(nworkers()), tests)]) elapsed_align = textwidth("Time (s)") gc_align = textwidth("GC (s)") percent_align = textwidth("GC %") alloc_align = textwidth("Alloc (MB)") rss_align = textwidth("RSS (MB)") printstyled(" "^(name_align + textwidth(testgroupheader) - 3), " | ") printstyled(" | ---------------- CPU ---------------- |\n", color=:white) printstyled(testgroupheader, color=:white) printstyled(lpad(workerheader, name_align - textwidth(testgroupheader) + 1), " | ", color=:white) printstyled("Time (s) | GC (s) | GC % | Alloc (MB) | RSS (MB) |\n", color=:white) print_lock = stdout isa Base.LibuvStream ? stdout.lock : ReentrantLock() if stderr isa Base.LibuvStream stderr.lock = print_lock end function print_testworker_stats(test, wrkr, resp) @nospecialize resp lock(print_lock) try printstyled(test, color=:white) printstyled(lpad("($wrkr)", name_align - textwidth(test) + 1, " "), " | ", color=:white) time_str = @sprintf("%7.2f",resp[2]) printstyled(lpad(time_str, elapsed_align, " "), " | ", color=:white) cpu_gc_str = @sprintf("%5.2f", resp[4]) printstyled(lpad(cpu_gc_str, gc_align, " "), " | ", color=:white) # since there may be quite a few digits in the percentage, # the left-padding here is less to make sure everything fits cpu_percent_str = @sprintf("%4.1f", 100 * resp[4] / resp[2]) printstyled(lpad(cpu_percent_str, percent_align, " "), " | ", color=:white) cpu_alloc_str = @sprintf("%5.2f", resp[3] / 2^20) printstyled(lpad(cpu_alloc_str, alloc_align, " "), " | ", color=:white) cpu_rss_str = @sprintf("%5.2f", resp[6] / 2^20) printstyled(lpad(cpu_rss_str, rss_align, " "), " |\n", color=:white) finally unlock(print_lock) end end global print_testworker_started = (name, wrkr)->begin end function print_testworker_errored(name, wrkr) lock(print_lock) try printstyled(name, color=:red) printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |", " "^elapsed_align, " failed at $(now())\n", color=:red) finally unlock(print_lock) end end # run tasks t0 = now() results = [] all_tasks = Task[] all_tests = copy(tests) try # Monitor stdin and kill this task on ^C # but don't do this on Windows, because it may deadlock in the kernel t = current_task() running_tests = Dict{String, DateTime}() if !Sys.iswindows() && isa(stdin, Base.TTY) stdin_monitor = @async begin term = REPL.Terminals.TTYTerminal("xterm", stdin, stdout, stderr) try REPL.Terminals.raw!(term, true) while true c = read(term, Char) if c == '\x3' Base.throwto(t, InterruptException()) break elseif c == '?' println("Currently running: ") tests = sort(collect(running_tests), by=x->x[2]) foreach(tests) do (test, date) println(test, " (running for ", round(now()-date, Minute), ")") end end end catch e isa(e, InterruptException) || rethrow() finally REPL.Terminals.raw!(term, false) end end end @sync begin function recycle_worker(p) rmprocs(p, waitfor=30) return nothing end for p in workers() @async begin push!(all_tasks, current_task()) while length(tests) > 0 test = popfirst!(tests) # sometimes a worker failed, and we need to spawn a new one if p === nothing p = addworker(1)[1] end wrkr = p local resp # run the test running_tests[test] = now() try resp = remotecall_fetch(runtests, wrkr, test_runners[test], test) catch e isa(e, InterruptException) && return resp = Any[e] end delete!(running_tests, test) push!(results, (test, resp)) # act on the results if resp[1] isa Exception print_testworker_errored(test, wrkr) do_quickfail && Base.throwto(t, InterruptException()) # the worker encountered some failure, recycle it # so future tests get a fresh environment p = recycle_worker(p) else print_testworker_stats(test, wrkr, resp) cpu_rss = resp[6] if haskey(ENV, "CI") && cpu_rss > 3*2^30 # XXX: collecting garbage # after each test, we are leaking CPU memory somewhere. # this is a problem on CI, where2 we don't have much RAM. # work around this by periodically recycling the worker. p = recycle_worker(p) end end end if p !== nothing recycle_worker(p) end end end end catch e isa(e, InterruptException) || rethrow() # If the test suite was merely interrupted, still print the # summary, which can be useful to diagnose what's going on foreach(task -> begin istaskstarted(task) || return istaskdone(task) && return try schedule(task, InterruptException(); error=true) catch ex @error "InterruptException" exception=ex,catch_backtrace() end end, all_tasks) for t in all_tasks # NOTE: we can't just wait, but need to discard the exception, # because the throwto for --quickfail also kills the worker. try wait(t) catch e showerror(stderr, e) end end finally if @isdefined stdin_monitor schedule(stdin_monitor, InterruptException(); error=true) end end t1 = now() elapsed = canonicalize(Dates.CompoundPeriod(t1-t0)) println("Testing finished in $elapsed") # construct a testset to render the test results o_ts = Test.DefaultTestSet("Overall") Test.push_testset(o_ts) completed_tests = Set{String}() for (testname, (resp,)) in results push!(completed_tests, testname) if isa(resp, Test.DefaultTestSet) Test.push_testset(resp) Test.record(o_ts, resp) Test.pop_testset() elseif isa(resp, Tuple{Int,Int}) fake = Test.DefaultTestSet(testname) for i in 1:resp[1] Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing)) end for i in 1:resp[2] Test.record(fake, Test.Broken(:test, nothing)) end Test.push_testset(fake) Test.record(o_ts, fake) Test.pop_testset() elseif isa(resp, RemoteException) && isa(resp.captured.ex, Test.TestSetException) println("Worker $(resp.pid) failed running test $(testname):") Base.showerror(stdout, resp.captured) println() fake = Test.DefaultTestSet(testname) for i in 1:resp.captured.ex.pass Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing)) end for i in 1:resp.captured.ex.broken Test.record(fake, Test.Broken(:test, nothing)) end for t in resp.captured.ex.errors_and_fails Test.record(fake, t) end Test.push_testset(fake) Test.record(o_ts, fake) Test.pop_testset() else if !isa(resp, Exception) resp = ErrorException(string("Unknown result type : ", typeof(resp))) end # If this test raised an exception that is not a remote testset exception, # i.e. not a RemoteException capturing a TestSetException that means # the test runner itself had some problem, so we may have hit a segfault, # deserialization errors or something similar. Record this testset as Errored. fake = Test.DefaultTestSet(testname) Test.record(fake, Test.Error(:nontest_error, testname, nothing, Any[(resp, [])], LineNumberNode(1))) Test.push_testset(fake) Test.record(o_ts, fake) Test.pop_testset() end end for test in all_tests (test in completed_tests) && continue fake = Test.DefaultTestSet(test) Test.record(fake, Test.Error(:test_interrupted, test, nothing, [("skipped", [])], LineNumberNode(1))) Test.push_testset(fake) Test.record(o_ts, fake) Test.pop_testset() end println() Test.print_test_results(o_ts, 1) if !o_ts.anynonpass println(" \033[32;1mSUCCESS\033[0m") else println(" \033[31;1mFAILURE\033[0m\n") Test.print_test_errors(o_ts) throw(Test.FallbackTestSetException("Test run finished with errors")) end ================================================ FILE: test/setup.jl ================================================ using Distributed, Test, oneAPI oneAPI.functional() || error("oneAPI.jl is not functional on this system") # GPUArrays has a testsuite that isn't part of the main package. # Include it directly. import GPUArrays gpuarrays = pathof(GPUArrays) gpuarrays_root = dirname(dirname(gpuarrays)) include(joinpath(gpuarrays_root, "test", "testsuite.jl")) testf(f, xs...; kwargs...) = TestSuite.compare(f, oneArray, xs...; kwargs...) const eltypes = [Int16, Int32, Int64, Complex{Int16}, Complex{Int32}, Complex{Int64}, Float16, Float32, ComplexF32] const float16_supported = oneL0.module_properties(device()).fp16flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP16 == oneL0.ZE_DEVICE_MODULE_FLAG_FP16 if float16_supported append!(eltypes, [#=Float16,=# ComplexF16]) end const float64_supported = oneL0.module_properties(device()).fp64flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP64 == oneL0.ZE_DEVICE_MODULE_FLAG_FP64 if float64_supported append!(eltypes, [Float64, ComplexF64]) end TestSuite.supported_eltypes(::Type{<:oneArray}) = eltypes const validation_layer = parse(Bool, get(ENV, "ZE_ENABLE_VALIDATION_LAYER", "false")) const parameter_validation = parse(Bool, get(ENV, "ZE_ENABLE_PARAMETER_VALIDATION", "false")) using Random ## entry point function runtests(f, name) old_print_setting = Test.TESTSET_PRINT_ENABLE[] Test.TESTSET_PRINT_ENABLE[] = false try # generate a temporary module to execute the tests in mod_name = Symbol("Test", rand(1:100), "Main_", replace(name, '/' => '_')) mod = @eval(Main, module $mod_name end) @eval(mod, using Test, Random, oneAPI) let id = myid() wait(@spawnat 1 print_testworker_started(name, id)) end ex = quote GC.gc(true) Random.seed!(1) oneAPI.allowscalar(false) @timed @testset $"$name" begin $f() end end data = Core.eval(mod, ex) #data[1] is the testset # process results cpu_rss = Sys.maxrss() if VERSION >= v"1.11.0-DEV.1529" tc = Test.get_test_counts(data[1]) passes,fails,error,broken,c_passes,c_fails,c_errors,c_broken = tc.passes, tc.fails, tc.errors, tc.broken, tc.cumulative_passes, tc.cumulative_fails, tc.cumulative_errors, tc.cumulative_broken else passes,fails,errors,broken,c_passes,c_fails,c_errors,c_broken = Test.get_test_counts(data[1]) end if data[1].anynonpass == false data = ((passes+c_passes,broken+c_broken), data[2], data[3], data[4], data[5]) end res = vcat(collect(data), cpu_rss) GC.gc(true) res finally Test.TESTSET_PRINT_ENABLE[] = old_print_setting end end ## auxiliary stuff # NOTE: based on test/pkg.jl::capture_stdout, but doesn't discard exceptions macro grab_output(ex) quote mktemp() do fname, fout ret = nothing open(fname, "w") do fout redirect_stdout(fout) do ret = $(esc(ex)) end end ret, read(fname, String) end end end # Run some code on-device macro on_device(ex...) code = ex[end] kwargs = ex[1:end-1] @gensym kernel esc(quote let function $kernel() $code return end oneAPI.@sync @oneapi $(kwargs...) $kernel() end end) end # helper function for sinking a value to prevent the callee from getting optimized away @inline sink(i::Int32) = Base.llvmcall("""%slot = alloca i32 store volatile i32 %0, i32* %slot %value = load volatile i32, i32* %slot ret i32 %value""", Int32, Tuple{Int32}, i) @inline sink(i::Int64) = Base.llvmcall("""%slot = alloca i64 store volatile i64 %0, i64* %slot %value = load volatile i64, i64* %slot ret i64 %value""", Int64, Tuple{Int64}, i) nothing # File is loaded via a remotecall to "include". Ensure it returns "nothing". ================================================ FILE: test/sorting.jl ================================================ using Test using oneAPI @testset "sorting" begin data = oneArray([3, 1, 4, 1, 5]) sort!(data) @test Array(data) == [1, 1, 3, 4, 5] data_rev = oneArray([3, 1, 4, 1, 5]) sort!(data_rev, rev = true) @test Array(data_rev) == [5, 4, 3, 1, 1] data = oneArray([3, 1, 4, 1, 5]) @test Array(sortperm(data)) == sortperm([3, 1, 4, 1, 5]) data_rev = oneArray([3, 1, 4, 1, 5]) @test Array(sortperm(data_rev, rev = true)) == sortperm([3, 1, 4, 1, 5], rev = true) end ================================================ FILE: test/sycl.jl ================================================ if Sys.iswindows() @warn "Skipping unsupported SYCL tests" else using oneAPI.oneL0, oneAPI.SYCL @test sycl_platform() isa syclPlatform ze_dev = device() sycl_dev = sycl_device(ze_dev) @test sycl_dev isa syclDevice ze_ctx = context() sycl_ctx = sycl_context(ze_ctx, ze_dev) @test sycl_ctx isa syclContext ze_queue = ZeCommandQueue(ze_ctx, ze_dev) @test sycl_queue(ze_queue) isa syclQueue ze_event_pool = ZeEventPool(ze_ctx, 1, ze_dev) ze_event = ze_event_pool[1] sycl_event = oneAPI.SYCL.syclEvent(sycl_ctx, ze_event) end