Full Code of JuliaGPU/oneAPI.jl for AI

master 2140c1fa0f76 cached
134 files
1.9 MB
519.5k tokens
919 symbols
1 requests
Download .txt
Showing preview only (1,966K chars total). Download the full file or copy to clipboard to get everything.
Repository: JuliaGPU/oneAPI.jl
Branch: master
Commit: 2140c1fa0f76
Files: 134
Total size: 1.9 MB

Directory structure:
gitextract_pys1ksq7/

├── .buildkite/
│   └── pipeline.yml
├── .github/
│   ├── dependabot.yml
│   └── workflows/
│       ├── CompatHelper.yml
│       ├── DocsCleanup.yml
│       ├── Format.yml
│       ├── TagBot.yml
│       ├── ci.yml
│       └── docs.yml
├── .gitignore
├── CITATION.cff
├── LICENSE.md
├── Project.toml
├── README.md
├── codecov.yml
├── deps/
│   ├── .clang-format
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── Project.toml
│   ├── build_ci.jl
│   ├── build_local.jl
│   ├── generate_helpers.jl
│   ├── generate_interfaces.jl
│   ├── onemkl_epilogue.cpp
│   ├── onemkl_epilogue.h
│   ├── onemkl_prologue.cpp
│   ├── onemkl_prologue.h
│   └── src/
│       ├── onemkl.cpp
│       ├── onemkl.h
│       ├── onemkl_dft.cpp
│       ├── onemkl_dft.h
│       ├── sycl.cpp
│       ├── sycl.h
│       └── sycl.hpp
├── docs/
│   ├── Project.toml
│   ├── make.jl
│   └── src/
│       ├── api/
│       │   ├── arrays.md
│       │   ├── compiler.md
│       │   ├── context.md
│       │   ├── kernels.md
│       │   └── memory.md
│       ├── api.md
│       ├── arrays.md
│       ├── device.md
│       ├── getting_started.md
│       ├── index.md
│       ├── installation.md
│       ├── kernels.md
│       ├── level_zero.md
│       ├── memory.md
│       ├── onemkl.md
│       ├── troubleshooting.md
│       └── usage/
│           └── performance.md
├── examples/
│   ├── gemm.jl
│   └── vadd.jl
├── lib/
│   ├── level-zero/
│   │   ├── barrier.jl
│   │   ├── cmdlist.jl
│   │   ├── cmdqueue.jl
│   │   ├── common.jl
│   │   ├── context.jl
│   │   ├── copy.jl
│   │   ├── device.jl
│   │   ├── driver.jl
│   │   ├── error.jl
│   │   ├── event.jl
│   │   ├── fence.jl
│   │   ├── libze.jl
│   │   ├── libze_aliases.jl
│   │   ├── memory.jl
│   │   ├── module.jl
│   │   ├── oneL0.jl
│   │   ├── pointer.jl
│   │   ├── residency.jl
│   │   └── utils.jl
│   ├── mkl/
│   │   ├── array.jl
│   │   ├── fft.jl
│   │   ├── interfaces.jl
│   │   ├── linalg.jl
│   │   ├── oneMKL.jl
│   │   ├── utils.jl
│   │   ├── wrappers_blas.jl
│   │   ├── wrappers_lapack.jl
│   │   └── wrappers_sparse.jl
│   ├── support/
│   │   ├── Support.jl
│   │   └── liboneapi_support.jl
│   ├── sycl/
│   │   └── SYCL.jl
│   └── utils/
│       ├── APIUtils.jl
│       └── enum.jl
├── res/
│   ├── Project.toml
│   ├── libze_prologue.jl
│   ├── local.jl
│   ├── support.toml
│   ├── wrap.jl
│   └── ze.toml
├── src/
│   ├── accumulate.jl
│   ├── array.jl
│   ├── broadcast.jl
│   ├── compiler/
│   │   ├── compilation.jl
│   │   ├── execution.jl
│   │   └── reflection.jl
│   ├── context.jl
│   ├── device/
│   │   ├── array.jl
│   │   ├── atomics.jl
│   │   ├── quirks.jl
│   │   └── runtime.jl
│   ├── gpuarrays.jl
│   ├── indexing.jl
│   ├── mapreduce.jl
│   ├── memory.jl
│   ├── oneAPI.jl
│   ├── oneAPIKernels.jl
│   ├── pool.jl
│   ├── random.jl
│   ├── sorting.jl
│   └── utils.jl
└── test/
    ├── Project.toml
    ├── array.jl
    ├── device/
    │   └── intrinsics.jl
    ├── dummy.bc
    ├── dummy.ll
    ├── dummy.spt
    ├── dummy.spv
    ├── examples.jl
    ├── execution.jl
    ├── fft.jl
    ├── indexing.jl
    ├── kernelabstractions.jl
    ├── level-zero.jl
    ├── onemkl.jl
    ├── pointer.jl
    ├── random.jl
    ├── runtests.jl
    ├── setup.jl
    ├── sorting.jl
    └── sycl.jl

================================================
FILE CONTENTS
================================================

================================================
FILE: .buildkite/pipeline.yml
================================================
steps:
  # Test supported Julia versions
  - group: ":julia: Julia"
    key: "julia"
    steps:
      - label: "Julia {{matrix.julia}}"
        plugins:
          - JuliaCI/julia#v1:
              version: "{{matrix.julia}}"
          - JuliaCI/julia-test#v1:
              test_args: "--quickfail"
          - JuliaCI/julia-coverage#v1:
              dirs:
                - src
                - lib
                - examples
        agents:
          queue: "juliagpu"
          intel: "*"
        commands: |
          julia --project=deps deps/build_ci.jl
        if: build.message !~ /\[skip tests\]/
        timeout_in_minutes: 120
        matrix:
          setup:
            julia:
              - "1.10"
              - "1.11"
              - "1.12"
              - "nightly"
          adjustments:
            - with:
                julia: "nightly"
              soft_fail: true

  # Special tests
  - group: ":eyes: Special"
    depends_on: "julia"
    steps:
      - label: "Validation"
        plugins:
          - JuliaCI/julia#v1:
              version: "1.11"
          - JuliaCI/julia-test#v1:
              julia_args: "-g2"
          - JuliaCI/julia-coverage#v1:
              codecov: true
              dirs:
                - src
                - lib
                - examples
        command: |
          julia --project=deps deps/build_ci.jl
          julia --project -e '
              # use debug JLLs, for asserts + better backtraces
              using oneAPI
              oneAPI.set_debug!(true)'
        if: build.message !~ /\[skip tests\]/
        env:
          ZE_ENABLE_VALIDATION_LAYER: '1'
          ZE_ENABLE_PARAMETER_VALIDATION: '1'
          EnableDebugBreak: '0'
        agents:
          queue: "juliagpu"
          intel: "*"
        if: build.message !~ /\[skip tests\]/ && !build.pull_request.draft
        timeout_in_minutes: 60
        soft_fail: true

env:
  JULIA_PKG_SERVER_REGISTRY_PREFERENCE: "eager" # OK to downloading JLLs from GitHub
  SECRET_CODECOV_TOKEN: "OYpS8fj3vGhj7iZf9vLAeapyxQNSOEW6mApcSvGboL9AlS+0nfOSFjFrIBNnIU0prxQQy1gR9AwR/JO1m2OFWeRhjYtkQPPhk4xVtSKmv0LLTL0snA8IohUopqfu722i7zLrPcz/A0LFIFsb0ey+oReJs2xnGOshNIJu4FDowUV3wmZvfKWNsSK4cGN+HFQ3387Ow4SsmiUr7oqh0iMBQNqaY8oZ2BY1dFOgPaOegIp70YEFRdJ8DKaLd7WGxFLY9oQEhZZdmx/zx0xo56/NGtDwVYkDPa4qPhJczDBoIn5XvcRiIW0VJ/MaRARxnpenBX5H6gwdcZYUGtjXWIRXBw==;U2FsdGVkX1/bZy1Bp4/dBH5scPpWqLKusXGvSkRGUa+1F7hi4P4Cu5a6GcfNIEvQr+bBj2VlZvqhNW0FAqN3QQ=="


================================================
FILE: .github/dependabot.yml
================================================
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
  - package-ecosystem: "github-actions"
    directory: "/" # Location of package manifests
    schedule:
      interval: "monthly"


================================================
FILE: .github/workflows/CompatHelper.yml
================================================
name: CompatHelper

on:
  schedule:
    - cron: '0 0 * * *'
  workflow_dispatch:

jobs:
  CompatHelper:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - name: Get Julia compatibility
        id: julia_compat
        # NOTE: this requires a Julia compat lower-bound with minor version!
        run : |
          version=$(grep '^julia = ' Project.toml | grep -o '".*"' | cut -d '"' -f2)
          echo "::set-output name=version::$version"
      - uses: julia-actions/setup-julia@v2
        with:
          version: ${{ steps.julia_compat.outputs.version }}
      - name: Install CompatHelper
        run: |
          import Pkg
          name = "CompatHelper"
          version = "3"
          Pkg.add(; name, version)
        shell: julia --color=yes {0}
      - name: Run CompatHelper
        run: |
          using CompatHelper
          CompatHelper.main()
        shell: julia --color=yes {0}
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/DocsCleanup.yml
================================================
name: Doc Preview Cleanup

on:
  pull_request:
    types: [closed]

jobs:
  doc-preview-cleanup:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout gh-pages branch
        uses: actions/checkout@v6
        with:
          ref: gh-pages

      - name: Delete preview and history
        run: |
            git config user.name "oneAPI.jl"
            git config user.email "oneapi@juliagpu.github.io"
            git rm -rf "previews/PR$PRNUM"
            git commit -m "delete preview"
            git branch gh-pages-new $(echo "delete history" | git commit-tree HEAD^{tree})
        env:
            PRNUM: ${{ github.event.number }}

      - name: Push changes
        run: |
            git push --force origin gh-pages-new:gh-pages

================================================
FILE: .github/workflows/Format.yml
================================================
name: 'Format'

on:
  pull_request_target:
    paths: ['**/*.jl']
    types: [opened, synchronize, reopened, ready_for_review]

permissions:
  contents: read
  actions: write
  pull-requests: write

jobs:
  runic:
    runs-on: ubuntu-latest
    if: github.event.pull_request.draft == false
    steps:
      - name: Check out repository
        uses: actions/checkout@v6
        with:
          ref: ${{github.event.pull_request.head.ref}}
          repository: ${{github.event.pull_request.head.repo.full_name}}
          fetch-depth: 0

      - name: Add upstream remote
        run: |
          git remote add upstream https://github.com/${{ github.repository }}
          git fetch upstream

      - name: Setup Julia
        uses: julia-actions/setup-julia@v2
        with:
          version: '1'
          arch: 'x64'
      - uses: julia-actions/cache@v2

      - name: Install Runic
        run: |
          julia --project=@runic -e 'using Pkg; Pkg.add("Runic")'
          curl -o git-runic https://raw.githubusercontent.com/fredrikekre/Runic.jl/master/bin/git-runic
          chmod +x git-runic
          sudo mv git-runic /usr/local/bin

      - name: Run Runic
        id: runic
        run: |
          set +e
          MERGE_BASE=$(git merge-base upstream/${{ github.base_ref }} HEAD) || exit 1
          DIFF=$(git runic --diff $MERGE_BASE)
          EXIT_CODE=$?

          echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
          echo "diff<<EOF" >> $GITHUB_OUTPUT
          echo "$DIFF" >> $GITHUB_OUTPUT
          echo "EOF" >> $GITHUB_OUTPUT

          # if Runic failed, bail out
          [ $EXIT_CODE -eq 2 ] && exit 1 || exit 0

      - name: Find comment
        uses: peter-evans/find-comment@v4
        id: find-comment
        with:
          issue-number: ${{ github.event.pull_request.number }}
          comment-author: 'github-actions[bot]'
          body-includes: '<!-- runic-format-summary -->'

      - name: Comment formatting suggestions
        if: steps.runic.outputs.exit_code == 1
        uses: peter-evans/create-or-update-comment@v5
        with:
          comment-id: ${{ steps.find-comment.outputs.comment-id }}
          issue-number: ${{ github.event.pull_request.number }}
          body: |
            <!-- runic-format-summary -->

            Your PR requires formatting changes to meet the project's style guidelines.
            Please consider running [Runic](https://github.com/fredrikekre/Runic.jl) (`git runic ${{ github.base_ref }}`) to apply these changes.

            <details>
            <summary>Click here to view the suggested changes.</summary>

            ~~~diff
            ${{ steps.runic.outputs.diff }}
            ~~~

            </details>
          edit-mode: replace

      - name: Update stale comment
        if: steps.runic.outputs.exit_code == 0 && steps.find-comment.outputs.comment-id
        uses: peter-evans/create-or-update-comment@v5
        with:
          comment-id: ${{ steps.find-comment.outputs.comment-id }}
          issue-number: ${{ github.event.pull_request.number }}
          body: |
            <!-- runic-format-summary -->

            Your PR no longer requires formatting changes. Thank you for your contribution!
          edit-mode: replace

      # XXX: if Github ever supports allow-failure (actions/runner#2347)
      #- name: Propagate exit code
      #  run: |
      #    exit ${{ steps.runic.outputs.exit_code }}


================================================
FILE: .github/workflows/TagBot.yml
================================================
name: TagBot

on:
  issue_comment:
    types:
      - created
  workflow_dispatch:

jobs:
  TagBot:
    if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
    runs-on: ubuntu-latest
    steps:
      - uses: JuliaRegistries/TagBot@v1
        with:
          token: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/ci.yml
================================================
name: CI

on:
  push:
    branches:
      - master
    tags: '*'
  pull_request:
    types: [opened, synchronize, reopened]
  schedule:
    - cron: '0 0 * * 0'

jobs:
  self-runner:
    continue-on-error: true
    env:
      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    runs-on: [self-hosted, linux, X64]
    strategy:
      matrix:
        os: [ubuntu-latest]
        julia-version: ['1']
        julia-arch: [x64]

    steps:
      - uses: actions/checkout@v6
      - uses: julia-actions/setup-julia@latest
        with:
          version: ${{ matrix.julia-version }}
      - uses: julia-actions/cache@v2
      - uses: julia-actions/julia-buildpkg@latest
        continue-on-error: true
      - uses: julia-actions/julia-runtest@latest
        continue-on-error: true


================================================
FILE: .github/workflows/docs.yml
================================================
name: Documentation

on:
  push:
    branches:
      - master
    tags: '*'
  pull_request:
    types: [opened, synchronize, reopened]
  schedule:
    - cron: '0 0 * * 0'

jobs:
  docs:
    name: Build documentation
    env:
      DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      JULIA_DEBUG: Documenter
    runs-on: [self-hosted, linux, X64]

    steps:
      - uses: actions/checkout@v6
      - uses: julia-actions/setup-julia@latest
        with:
          version: 'lts'
      - uses: julia-actions/cache@v2
      - uses: julia-actions/julia-buildpkg@latest
      - run: julia --project=docs/ docs/make.jl


================================================
FILE: .gitignore
================================================
LocalPreferences.toml
Manifest.toml
deps/onemkl_blas.cpp
deps/onemkl_blas.h
deps/onemkl_lapack.cpp
deps/onemkl_lapack.h
deps/onemkl_sparse.cpp
deps/onemkl_sparse.h
docs/build


================================================
FILE: CITATION.cff
================================================
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
  - family-names: Besard
    given-names: Tim
    orcid: https://orcid.org/0000-0001-7826-8021
copyright: "© 2022 Julia Computing, and other contributors"
title: "oneAPI.jl"
version: 0.3.0
doi: 10.5281/zenodo.7139359
date-released: 2022-10-03
url: "https://github.com/JuliaGPU/oneAPI.jl"


================================================
FILE: LICENSE.md
================================================
The oneAPI.jl package is licensed under the MIT "Expat" License:

> Copyright (c) 2020-present: Julia Computing and other contributors
>
> All Rights Reserved.
>
> Permission is hereby granted, free of charge, to any person obtaining a copy
> of this software and associated documentation files (the "Software"), to deal
> in the Software without restriction, including without limitation the rights
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> copies of the Software, and to permit persons to whom the Software is
> furnished to do so, subject to the following conditions:
>
> The above copyright notice and this permission notice shall be included in all
> copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> SOFTWARE.
>


================================================
FILE: Project.toml
================================================
name = "oneAPI"
uuid = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
authors = ["Tim Besard <tim.besard@gmail.com>", "Alexis Montoison", "Michel Schanen <michel.schanen@gmail.com>"]
version = "2.6.1"

[deps]
AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82"
ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
GPUToolbox = "096a3bc2-3ced-46d0-87f4-dd12716f4bfc"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
NEO_jll = "700fe977-ac61-5f37-bbc8-c6c4b2b6a9fd"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SPIRVIntrinsics = "71d1d633-e7e8-4a92-83a1-de8814b09ba8"
SPIRV_LLVM_Translator_jll = "4a5d46fc-d8cf-5151-a261-86b458210efb"
SPIRV_Tools_jll = "6ac6d60f-d740-5983-97d7-a4482c0689f4"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d"
oneAPI_Level_Zero_Loader_jll = "13eca655-d68d-5b81-8367-6d99d727ab01"
oneAPI_Support_jll = "b049733a-a71d-5ed3-8eba-7d323ac00b36"

[compat]
AbstractFFTs = "1.5.0"
AcceleratedKernels = "0.3.1, 0.4"
Adapt = "4"
CEnum = "0.4, 0.5"
ExprTools = "0.1"
GPUArrays = "11.2.1"
GPUCompiler = "1.6"
GPUToolbox = "0.1, 0.2, 0.3, 1"
KernelAbstractions = "0.9.39"
LLVM = "6, 7, 8, 9"
NEO_jll = "=25.44.36015"
Preferences = "1"
SPIRVIntrinsics = "0.5"
SPIRV_LLVM_Translator_jll = "21"
SPIRV_Tools_jll = "2025.4.0"
SpecialFunctions = "1.3, 2"
StaticArrays = "1"
julia = "1.10"
oneAPI_Level_Zero_Loader_jll = "1.25"
oneAPI_Support_jll = "0.9.2"

[extras]
libigc_jll = "94295238-5935-5bd7-bb0f-b00942e9bdd5"


================================================
FILE: README.md
================================================
# oneAPI.jl

*Julia support for the oneAPI programming toolkit.*

[![][doi-img]][doi-url] [![][buildkite-img]][buildkite-url] [![][codecov-img]][codecov-url] [![][docs-stable-img]][docs-stable-url] [![][docs-dev-img]][docs-dev-url]

[doi-img]: https://zenodo.org/badge/252466420.svg
[doi-url]: https://zenodo.org/badge/latestdoi/252466420

[buildkite-img]: https://badge.buildkite.com/00fff01fd4d6cdd905e61e2ce7ed0f7203ba227df9b575426c.svg?branch=master
[buildkite-url]: https://buildkite.com/julialang/oneapi-dot-jl

[codecov-img]: https://codecov.io/gh/JuliaGPU/oneAPI.jl/branch/master/graph/badge.svg
[codecov-url]: https://codecov.io/gh/JuliaGPU/oneAPI.jl

[docs-stable-img]: https://img.shields.io/badge/docs-stable-blue.svg
[docs-stable-url]: https://juliagpu.github.io/oneAPI.jl/stable

[docs-dev-img]: https://img.shields.io/badge/docs-dev-blue.svg
[docs-dev-url]: https://juliagpu.github.io/oneAPI.jl/dev

oneAPI.jl provides support for working with the [oneAPI unified programming
model](https://software.intel.com/en-us/oneapi). The package is verified to work with the
(currently) only implementation of this interface [that is part of the Intel Compute
Runtime](https://github.com/intel/compute-runtime), only available on Linux.
Windows support is experimental.


## Status

**oneAPI.jl is looking for contributors and/or a maintainer. Reach out if you can help!**

The current version of oneAPI.jl supports most of the oneAPI Level Zero interface, has
good kernel programming capabilties, and as a demonstration of that it fully implements
the GPUArrays.jl array interfaces. This results in a full-featured GPU array type.

However, the package has not been extensively tested, and performance issues might be
present. The integration with vendor libraries like oneMKL has been extended with support
for sparse linear algebra operations. Some operations may still be unavailable or slow.


## Quick start

You need to use Julia 1.10 or higher, and it is strongly advised to use [the official
binaries](https://julialang.org/downloads/). For now, only Linux is supported.
On Windows, you need to use the second generation Windows Subsystem for Linux (WSL2).
**If you're using Intel Arc GPUs (A580, A750, A770, etc), you need to use at least
Linux 6.2.** For other hardware, any recent Linux distribution should work.

Once you have installed Julia, proceed by entering the package manager REPL mode by pressing
`]` and adding the oneAPI package:

```
pkg> add oneAPI
```

This installation will take a couple of minutes to download necessary binaries, such as the
oneAPI loader, several SPIR-V tools, etc. For now, the oneAPI.jl package also depends on
[the Intel implementation](https://github.com/intel/compute-runtime) of the oneAPI spec.
That means you need compatible hardware; refer to the Intel documentation for more details.

Once you have oneAPI.jl installed, perform a smoke test by calling the `versioninfo()` function:

```julia
julia> using oneAPI

julia> oneAPI.versioninfo()
Binary dependencies:
- NEO: 25.35.35096
- libigc: 1.0.17193+0
- gmmlib: 22.3.20+0
- SPIRV_LLVM_Translator: 21
- SPIRV_Tools: 2025.4.0
- oneAPI_Support: 0.9.2 (oneMKL v2025.2.0)

Toolchain:
- Julia: 1.11.5
- LLVM: 16.0.6

1 driver:
- 00000000-0000-0000-173d-d94201036013 (v1.3.24595, API v1.3.0)

2 devices:
- Intel(R) Graphics [0x56a0]
- Intel(R) HD Graphics P630 [0x591d]
```

If you have multiple compatible drivers or devices, use the `driver!` and `device!`
functions to configure which one to use in the current task:

```julia
julia> devices()
ZeDevice iterator for 2 devices:
1. Intel(R) Graphics [0x56a0]
2. Intel(R) HD Graphics P630 [0x591d]

julia> device()
ZeDevice(GPU, vendor 0x8086, device 0x56a0): Intel(R) Graphics [0x56a0]

julia> device!(2)
ZeDevice(GPU, vendor 0x8086, device 0x591d): Intel(R) HD Graphics P630 [0x591d]
```

To ensure other functionality works as expected, you can run the test suite from the package
manager REPL mode. Note that this will pull and run the test suite for
[GPUArrays](https://github.com/JuliaGPU/GPUArrays.jl), which takes quite some time:

```
pkg> test oneAPI
...
Testing finished in 16 minutes, 27 seconds, 506 milliseconds

Test Summary: | Pass  Total  Time
  Overall     | 4945   4945
    SUCCESS
     Testing oneAPI tests passed
```


## Usage

The functionality of oneAPI.jl is organized as follows:

- low-level wrappers for the Level Zero library
- kernel programming capabilities
- abstractions for high-level array programming

The level zero wrappers are available in the `oneL0` submodule, and expose all flexibility
of the underlying APIs with user-friendly wrappers:

```julia
julia> using oneAPI, oneAPI.oneL0

julia> drv = first(drivers());

julia> ctx = ZeContext(drv);

julia> dev = first(devices(drv))
ZeDevice(GPU, vendor 0x8086, device 0x1912): Intel(R) Gen9

julia> compute_properties(dev)
(maxTotalGroupSize = 256, maxGroupSizeX = 256, maxGroupSizeY = 256, maxGroupSizeZ = 256, maxGroupCountX = 4294967295, maxGroupCountY = 4294967295, maxGroupCountZ = 4294967295, maxSharedLocalMemory = 65536, subGroupSizes = (8, 16, 32))

julia> queue = ZeCommandQueue(ctx, dev);

julia> execute!(queue) do list
         append_barrier!(list)
       end
```

Built on top of that, are kernel programming capabilities for executing Julia code on oneAPI
accelerators. For now, we reuse OpenCL intrinsics, and compile to SPIR-V using [Khronos'
translator](https://github.com/KhronosGroup/SPIRV-LLVM-Translator):

```julia
julia> function kernel()
         barrier(0)
         return
       end

julia> @oneapi items=1 kernel()
```

Code reflection macros are available to see the generated code:

```julia
julia> @device_code_llvm @oneapi items=1 kernel()
```

```llvm
;  @ REPL[18]:1 within `kernel'
define dso_local spir_kernel void @_Z17julia_kernel_3053() local_unnamed_addr {
top:
;  @ REPL[18]:2 within `kernel'
; ┌ @ oneAPI.jl/src/device/opencl/synchronization.jl:9 within `barrier' @ oneAPI.jl/src/device/opencl/synchronization.jl:9
; │┌ @ oneAPI.jl/src/device/opencl/utils.jl:34 within `macro expansion'
    call void @_Z7barrierj(i32 0)
; └└
;  @ REPL[18]:3 within `kernel'
  ret void
}
```

```julia
julia> @device_code_spirv @oneapi items=1 kernel()
```

```spirv
; SPIR-V
; Version: 1.0
; Generator: Khronos LLVM/SPIR-V Translator; 14
; Bound: 9
; Schema: 0
               OpCapability Addresses
               OpCapability Kernel
          %1 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %4 "_Z17julia_kernel_3067"
               OpSource OpenCL_C 200000
               OpName %top "top"
       %uint = OpTypeInt 32 0
     %uint_2 = OpConstant %uint 2
     %uint_0 = OpConstant %uint 0
       %void = OpTypeVoid
          %3 = OpTypeFunction %void
          %4 = OpFunction %void None %3
        %top = OpLabel
               OpControlBarrier %uint_2 %uint_2 %uint_0
               OpReturn
               OpFunctionEnd

```

Finally, the `oneArray` type makes it possible to use your oneAPI accelerator without the
need to write custom kernels, thanks to Julia's high-level array abstractions:

```julia
julia> a = oneArray(rand(Float32, 2,2))
2×2 oneArray{Float32,2}:
 0.592979  0.996154
 0.874364  0.232854

julia> a .+ 1
2×2 oneArray{Float32,2}:
 1.59298  1.99615
 1.87436  1.23285
```

The oneMKL integration provides extended support for linear algebra operations, including sparse
matrix operations that integrate with Julia's standard LinearAlgebra interface:

```julia
julia> using oneAPI, oneAPI.oneMKL, SparseArrays, LinearAlgebra
julia> A = sprand(100, 100, 0.1)
julia> dA = oneMKL.oneSparseMatrixCSC(A)
julia> x = oneArray(rand(100))
julia> y = dA * x  # Matrix-vector multiplication via LinearAlgebra
```

### `Float64` support

Not all oneAPI GPUs support Float64 datatypes. You can test if your GPU does using
the following code:

```julia
julia> using oneAPI
julia> oneL0.module_properties(device()).fp64flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP64 == oneL0.ZE_DEVICE_MODULE_FLAG_FP64
false
```

If your GPU doesn't, executing code that relies on Float64 values will result in an error:

```julia
julia> oneArray([1.]) .+ 1
┌ Error: Module compilation failed:
│
│ error: Double type is not supported on this platform.
```



## Development

To work on oneAPI.jl, you just need to `dev` the package. In addition, you may need to
**build the binary support library** that's used to interface with oneMKL and other C++
vendor libraries. This library is normally provided by the oneAPI_Support_jll.jl package,
however, we only guarantee to update this package when releasing oneAPI.jl. You can build
this library yourself by simply executing `deps/build_local.jl`.

To facilitate development, there are other things you may want to configure:

### Enabling the oneAPI validation layer

The oneAPI Level Zero libraries feature a so-called validation layer, which
validates the arguments to API calls. This can be useful to spot potential
isssues, and can be enabled by setting the following environment variables:

- `ZE_ENABLE_VALIDATION_LAYER=1`
- `ZE_ENABLE_PARAMETER_VALIDATION=1`
- `EnableDebugBreak=0` (this is needed to work around intel/compute-runtime#639)

### Using a debug toolchain

If you're experiencing an issue with the underlying toolchain (NEO, IGC, etc), you may
want to use a debug build of these components, which also perform additional
validation. This can be done simply by calling `oneAPI.set_debug!(true)` and restarting
your Julia session. This sets a preference used by the respective JLL packages.

### Using a local toolchain

To further debug the toolchain, you may need a custom build and point oneAPI.jl towards it.
This can also be done using preferences, overriding the paths to resources provided by the
various JLLs that oneAPI.jl uses. A helpful script to automate this is provided in the
`res` folder of this repository:

```
$ julia res/local.jl

Trying to find local IGC...
- found libigc at /usr/local/lib/libigc.so
- found libiga64 at /usr/local/lib/libiga64.so
- found libigdfcl at /usr/local/lib/libigdfcl.so
- found libopencl-clang at /usr/local/lib/libopencl-clang.so.11

Trying to find local gmmlib...
- found libigdgmm at /usr/local/lib/libigdgmm.so

Trying to find local NEO...
- found libze_intel_gpu.so.1 at /usr/local/lib/libze_intel_gpu.so.1
- found libigdrcl at /usr/local/lib/intel-opencl/libigdrcl.so

Trying to find local oneAPI loader...
- found libze_loader at /lib/x86_64-linux-gnu/libze_loader.so
- found libze_validation_layer at /lib/x86_64-linux-gnu/libze_validation_layer.so

Writing preferences...
```

The discovered paths will be written to a global file with preferences, typically
`$HOME/.julia/environments/vX.Y/LocalPreferences.toml` (where `vX.Y` refers to the Julia
version you are using). You can modify this file, or remove it when you want to revert to
default set of binaries.


================================================
FILE: codecov.yml
================================================
coverage:
  ignore:
    - "lib/*/lib*.jl"
    - "src/device"
    - "res/"
  status:
    patch: false
    project: false
    changes: false


================================================
FILE: deps/.clang-format
================================================
---
IndentWidth: '4'
MaxEmptyLinesToKeep: '2'
...


================================================
FILE: deps/.gitignore
================================================
liboneapilib.so
Manifest.toml


================================================
FILE: deps/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.13)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)


project(oneAPISupport)

add_library(oneapi_support SHARED
  src/sycl.h
  src/sycl.hpp
  src/sycl.cpp
  src/onemkl.h
  src/onemkl.cpp
  src/onemkl_dft.h
  src/onemkl_dft.cpp
)

target_link_libraries(oneapi_support
  mkl_sycl
  # DFT component libraries needed for oneMKL DFT template instantiations
  mkl_sycl_dft
  mkl_cdft_core
  mkl_intel_ilp64
  mkl_sequential
  mkl_core
  sycl
  OpenCL

  # XXX: we don't want to link against this plugin, but otherwise the run-time
  #      loader doesn't find it (since it's located in the non-global Conda
  #      library directory, and we can't set LD_LIBRARY_PATH from within Julia).
  ur_adapter_level_zero
)

install(TARGETS oneapi_support
        LIBRARY DESTINATION lib)


================================================
FILE: deps/Project.toml
================================================
[deps]
CMake_jll = "3f4e10e2-61f2-5801-8945-23b9d642d0e6"
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Git = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Ninja_jll = "76642167-d241-5cee-8c94-7a494e8cb7b7"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Scratch = "6c6a2e73-6563-6170-7368-637461726353"
oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d"
oneAPI_Support_Headers_jll = "24f86df5-245d-5634-a4cc-32433d9800b3"

[compat]
oneAPI_Support_Headers_jll = "=2025.2.0"


================================================
FILE: deps/build_ci.jl
================================================
using Pkg
Pkg.activate(@__DIR__)
Pkg.instantiate()

using Git, Scratch, Dates

oneAPI = Base.UUID("8f75cd03-7ff8-4ecb-9b8f-daf728133b1b")

# get scratch directories
support_dir = get_scratch!(oneAPI, "support")

# is this a full-fledged check-out?
if isdir(joinpath(@__DIR__), "..", ".git")
    # determine latest change to the wrappers
    deps_timestamp = parse(Int, read(`$(git()) -C $(@__DIR__) log -1 --format=%ct src`, String))
    @info "Latest change to the wrappers: $(unix2datetime(deps_timestamp))"

    # find out which version of oneAPI_Support_jll we are using
    Pkg.activate(joinpath(@__DIR__, ".."))
    Pkg.instantiate()
    deps = collect(values(Pkg.dependencies()))
    filter!(deps) do dep
        dep.name == "oneAPI_Support_jll"
    end
    library_version = only(deps).version
    @info "oneAPI_Support_jll version: $(library_version)"

    # compare to the JLL's tags
    jll_tags = mktempdir() do dir
        if !isdir(joinpath(support_dir, ".git"))
            run(`$(git()) clone -q https://github.com/JuliaBinaryWrappers/oneAPI_Support_jll.jl $dir`)
        else
            run(`$(git()) -C $dir fetch -q`)
        end
        tags = Dict{String,Int}()
        for line in eachline(`$(git()) -C $dir tag --format "%(refname:short) %(creatordate:unix)"`)
            tag, timestamp = split(line)
            tags[tag] = parse(Int, timestamp)
        end
        tags
    end
    jll_timestamp = jll_tags["oneAPI_Support-v$(library_version)"]
    @info "oneAPI_Support_jll timestamp: $(unix2datetime(jll_timestamp))"

    if deps_timestamp > jll_timestamp
        @info "Wrappers have changed since the last JLL build. Building the support library locally."
        include(joinpath(@__DIR__, "build_local.jl"))
    else
        @info "Wrappers have not changed since the last JLL build. Using the JLL's support library."
    end
else
    @warn """oneAPI.jl source code is not checked-out from Git.
             This means we cannot check for changes, and need to unconditionally build the support library."""
    include(joinpath(@__DIR__, "build_local.jl"))
end


================================================
FILE: deps/build_local.jl
================================================
# build liboneapi_support with C wrappers for C++ APIs

using Pkg
Pkg.activate(@__DIR__)
Pkg.instantiate()

if haskey(ENV, "BUILDKITE")
    run(`buildkite-agent annotate 'Using a locally-built support library; A bump of oneAPI_Support_jll is required before releasing this packages.' --style 'warning' --context 'ctx-deps'`)
end

using Scratch, Preferences, CMake_jll, Ninja_jll, oneAPI_Level_Zero_Headers_jll

oneAPI = Base.UUID("8f75cd03-7ff8-4ecb-9b8f-daf728133b1b")

# get scratch directories
conda_dir = get_scratch!(oneAPI, "conda")
install_dir = get_scratch!(oneAPI, "deps")
rm(install_dir; recursive=true)

# get build directory
build_dir = if isempty(ARGS)
    mktempdir()
else
    ARGS[1]
end
mkpath(build_dir)

# install the toolchain
try
    using Conda
catch err
    # Sometimes, Conda fails to import because its environment is missing.
    # That's probably caused by a missing build, but Pkg should do that...
    Pkg.build("Conda")
    using Conda
end
if !isdir(Conda.ROOTENV)
    # Same as above
    Pkg.build("Conda")
end
if !isfile(joinpath(conda_dir, "condarc-julia.yml"))
    Conda.create(conda_dir)
    # conda#8850
    mkpath(joinpath(conda_dir, "conda-meta"))
    touch(joinpath(conda_dir, "conda-meta", "history"))
end
Conda.add_channel("https://software.repos.intel.com/python/conda/", conda_dir)
Conda.add(["dpcpp_linux-64=2025.2.0", "mkl-devel-dpcpp=2025.2.0"], conda_dir)

Conda.list(conda_dir)

# XXX: isn't there a Conda package providing ze_api.hpp?
include_dir = joinpath(oneAPI_Level_Zero_Headers_jll.artifact_dir, "include")

# build and install
withenv("PATH"=>"$(ENV["PATH"]):$(Conda.bin_dir(conda_dir))",
        "LD_LIBRARY_PATH"=>Conda.lib_dir(conda_dir)) do
    cmake() do cmake_path
    ninja() do ninja_path
        run(```$cmake_path -DCMAKE_CXX_COMPILER="icpx"
                           -DCMAKE_CXX_FLAGS="-fsycl -isystem $(conda_dir)/include -isystem $include_dir -fdiagnostics-color=always"
                           -DCMAKE_INSTALL_RPATH=$(Conda.lib_dir(conda_dir))
                           -DCMAKE_INSTALL_PREFIX=$install_dir
                           -GNinja -S $(@__DIR__) -B $build_dir```)
        run(`$cmake_path --build $(build_dir) --target install`)
    end
    end
end

# TODO: adapt when we support more platforms
lib_path = joinpath(install_dir, "lib", "liboneapi_support.so")
@assert ispath(lib_path)

# tell oneAPI_Support_jll to load our library instead of the default artifact one
set_preferences!(
    joinpath(dirname(@__DIR__), "LocalPreferences.toml"),
    "oneAPI_Support_jll",
    "liboneapi_support_path" => lib_path;
    force=true,
)

# copy the preferences to `test/` as well to work around Pkg.jl#2500
cp(joinpath(dirname(@__DIR__), "LocalPreferences.toml"),
   joinpath(dirname(@__DIR__), "test", "LocalPreferences.toml"); force=true)


================================================
FILE: deps/generate_helpers.jl
================================================
non_parametric_routines = ["init_matrix_handle", "release_matrix_handle", "set_matrix_property",
"init_matmat_descr", "release_matmat_descr", "set_matmat_data", "get_matmat_data", "matmat",
"omatcopy", "sort_matrix", "optimize_gemv", "optimize_gemm", "optimize_trmv", "optimize_trsv", "optimize_trsm",
"init_omatconvert_descr", "release_omatconvert_descr", "init_omatadd_descr", "release_omatadd_descr",
"omatconvert_buffer_size", "omatconvert_analyze", "omatconvert_get_nnz", "omatconvert",
"omatadd_buffer_size", "omatadd_analyze", "omatadd_get_nnz"]

function analyzer_template(library::String, cpp_headers::String, name_routine::String)
  list_parameters = Vector{String}[]
  list_types = Vector{String}[]
  list_versions = String[]
  list_suffix = String[]

  if (library == "blas") || (library == "sparse" && !(name_routine ∈ non_parametric_routines))
    prefix = (library == "sparse") ? "SPARSE_" : "BUF_"

    occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T)", cpp_headers) && (list_parameters = ["T"])
    occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(FpType)", cpp_headers) && (list_parameters = ["FpType"])
    occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(Tf, Ti)", cpp_headers) && (list_parameters = ["Tf", "Ti"])
    occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T, Ts)", cpp_headers) && (list_parameters = ["T", "Ts"])
    occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(IntType, FpType)", cpp_headers) && (list_parameters = ["IntType", "FpType"])
    occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(Ta, Tb, Tc, Ts)", cpp_headers) && (list_parameters = ["Ta", "Tb", "Tc", "Ts"])
    occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T, Tres)", cpp_headers) && (list_parameters = ["T", "Tres"])
    occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T, Treal)", cpp_headers) && (list_parameters = ["T", "Treal"])
    occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T, Tc, Ts)", cpp_headers) && (list_parameters = ["T", "Tc", "Ts"])
    occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))(T, Tc)", cpp_headers) && (list_parameters = ["T", "Tc"])
    
    (list_parameters == []) && @warn("Unable to determine the parametric parameters of $(name_routine).")
    
    for (type, version, suffix) in [(["sycl::half"], "H", ""),
                                    (["float"], "S", ""),
                                    (["double"], "D", ""),
                                    (["std::complex<float>"], "C", ""),
                                    (["std::complex<double>"], "Z", "")]
      if occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))($(type[1]))", cpp_headers)
        push!(list_types, type)
        push!(list_versions, version)
        push!(list_suffix, suffix)
      end
    end
    
    for (type, version, suffix) in [(["int32_t","float"], "S", ""),
                                    (["int64_t","float"], "S", "_64"),
                                    (["int32_t","double"], "D", ""),
                                    (["int64_t","double"], "D", "_64"),
                                    (["int32_t","std::complex<float>"], "C", ""),
                                    (["int64_t","std::complex<float>"], "C", "_64"),
                                    (["int32_t","std::complex<double>"], "Z", ""),
                                    (["int64_t","std::complex<double>"], "Z", "_64"),
                                    (["float","int32_t"], "S", ""),
                                    (["float","int64_t"], "S", "_64"),
                                    (["double","int32_t"], "D", ""),
                                    (["double","int64_t"], "D", "_64"),
                                    (["std::complex<float>","int32_t"], "C", ""),
                                    (["std::complex<float>","int64_t"], "C", "_64"),
                                    (["std::complex<double>","int32_t"], "Z", ""),
                                    (["std::complex<double>","int64_t"], "Z", "_64"),
                                    (["sycl::half","sycl::half"], "H", ""),
                                    (["float","float"], "S", ""),
                                    (["double","double"], "D", ""),
                                    (["std::complex<float>","float"], "CS", ""),
                                    (["std::complex<double>","double"], "ZD", ""),
                                    (["std::complex<float>","std::complex<float>"], "C", ""),
                                    (["std::complex<double>","std::complex<double>"], "Z", "")]
      if occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))($(type[1]), $(type[2]))", cpp_headers)
        push!(list_types, type)
        push!(list_versions, version)
        push!(list_suffix, suffix)
      end
    end
    
    for (type, version, suffix) in [(["sycl::half","sycl::half","sycl::half"], "H", ""),
                                    (["float","float","float"], "S", ""),
                                    (["double","double","double"], "D", ""),
                                    (["std::complex<float>","float","float"], "CS", ""),
                                    (["std::complex<float>","float", "std::complex<float>"], "C", ""),
                                    (["std::complex<double>","double","double"], "ZD", ""),
                                    (["std::complex<double>","double","std::complex<double>"], "Z", "")]
      if occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))($(type[1]), $(type[2]), $(type[3]))", cpp_headers)
        push!(list_types, type)
        push!(list_versions, version)
        push!(list_suffix, suffix)
      end
    end
    
    for (type, version, suffix) in [(["sycl::half","sycl::half","sycl::half","sycl::half"], "H", ""),
                                    (["float","float","float","float"], "S", ""),
                                    (["double","double","double","double"], "D", ""),
                                    (["std::complex<float>","std::complex<float>","std::complex<float>","std::complex<float>"], "C", ""),
                                    (["std::complex<double>","std::complex<double>","std::complex<double>","std::complex<double>"], "Z", "")]
      if occursin("ONEMKL_DECLARE_$(prefix)$(uppercase(name_routine))($(type[1]), $(type[2]), $(type[3]), $(type[4]))", cpp_headers)
        push!(list_types, type)
        push!(list_versions, version)
        push!(list_suffix, suffix)
      end
    end
  end

  return list_parameters, list_types, list_versions, list_suffix
end


================================================
FILE: deps/generate_interfaces.jl
================================================
using oneAPI_Support_Headers_jll

include("generate_helpers.jl")

include_dir = joinpath(oneAPI_Support_Headers_jll.artifact_dir, "include")
blas = [joinpath(include_dir, "oneapi", "mkl", "blas", "buffer_decls.hpp")]
lapack = [joinpath(include_dir, "oneapi", "mkl", "lapack", "lapack.hpp"),
          joinpath(include_dir, "oneapi", "mkl", "lapack", "scratchpad.hpp")]
sparse = [joinpath(include_dir, "oneapi", "mkl", "spblas", "sparse_structures.hpp"),
          joinpath(include_dir, "oneapi", "mkl", "spblas", "sparse_auxiliary.hpp"),
          joinpath(include_dir, "oneapi", "mkl", "spblas", "sparse_operations.hpp")]

dict_version = Dict{Int, Char}(1 => 'S', 2 => 'D', 3 => 'C', 4 => 'Z')

version_types = Dict{Char, String}('S' => "float",
                                   'D' => "double",
                                   'C' => "std::complex<float>",
                                   'Z' => "std::complex<double>")

version_types_header = Dict{Char, String}('S' => "float",
                                          'D' => "double",
                                          'C' => "float _Complex",
                                          'Z' => "double _Complex")

comments = ["namespace", "#", "}", "/*", "*", "//", "[[", "ONEMKL_DECLARE_", "ONEMKL_INLINE_DECLARE"]

void_output = ["init_matrix_handle", "init_matmat_descr", "release_matmat_descr", "set_matmat_data",
               "get_matmat_data", "init_omatadd_descr", "init_omatconvert_descr"]

function generate_headers(library::String, filename::Vector{String}, output::String; pattern::String="")
  routines = Dict{String,Int}()
  signatures = []
  signatures2 = []
  cpp_headers = ""
  for file in filename
    cpp_headers = cpp_headers * read(file, String)
  end
  cpp_headers = replace(cpp_headers, "std::int32_t" => "int32_t")
  cpp_headers = replace(cpp_headers, "std::int64_t" => "int64_t")
  cpp_headers = replace(cpp_headers, "; \\" => ";")
  cpp_headers = replace(cpp_headers, ")\n\n" => ");\n\n")
  cpp_headers = replace(cpp_headers, "\\\n" => "\n")
  cpp_headers = replace(cpp_headers, "sycl::event\n" => "sycl::event ")
  headers = ""

  # Remove comments
  for header in split(cpp_headers, '\n')
    mapreduce(x -> !startswith(strip(header), x) && !occursin("\"", header), &, comments) && (headers *= header)
  end

  # Analyse each header
  headers = split(headers, ';')
  for (i, header) in enumerate(headers)
    # We only generate C interfaces for exported symbols
    !occursin("DLL_EXPORT", header) && !occursin("_scratchpad_size", header) && continue

    # We don't want to interface routines with the following types, parameters or names
    occursin("class", header) && continue
    occursin("span", header) && continue
    occursin("bfloat16", header) && continue
    occursin("::int8_t", header) && continue
    (library == "lapack") && occursin("void", header) && continue # We only want USM routines
    (library == "sparse") && occursin("trsv", header) && !occursin("optimize_trsv", header) && !occursin("alpha", header) && continue  # SPARSE routine
    occursin("(matrix_handle_t SpMat", header) && continue  # SPARSE routine
    occursin("set_csr_data(matrix_handle_t", header) && continue  # SPARSE routine
    occursin("release_matrix_handle(matrix_handle_t", header) && continue  # SPARSE routine
    occursin("get_matmat_data", header) && continue  # SPARSE routine
    occursin("matmat(", header) && continue  # SPARSE routine
    bool = occursin("release", header) || occursin("init", header)
    (library == "sparse") && occursin("omatconvert", header) && !bool && continue  # SPARSE routine
    (library == "sparse") && occursin("omatadd", header) && !bool && continue  # SPARSE routine
    occursin("gemm_bias", header) && continue  # BLAS routine
    occursin("getri_batch", header) && occursin("ldainv", header) && continue  # LAPACK routine

    # Check if the routine is a template
    template = occursin("template", header)
    if template
      header = replace(header, "template <typename fp, oneapi::mkl::lapack::internal::is_floating_point<fp> = nullptr>         " => "")
      header = replace(header, "template <typename fp, oneapi::mkl::lapack::internal::is_real_floating_point<fp> = nullptr>    " => "")
      header = replace(header, "template <typename fp, oneapi::mkl::lapack::internal::is_complex_floating_point<fp> = nullptr> " => "")

      header = replace(header, "template <typename data_t, oneapi::mkl::lapack::internal::is_floating_point<data_t> = nullptr>" => "")
      header = replace(header, "template <typename data_t, oneapi::mkl::lapack::internal::is_real_floating_point<data_t> = nullptr>" => "")
      header = replace(header, "template <typename data_t, oneapi::mkl::lapack::internal::is_complex_floating_point<data_t> = nullptr>" => "")
      header = replace(header, "template <typename fp_type, internal::is_floating_point<fp_type> = nullptr>" => "")
      header = replace(header, "template <typename fp_type, internal::is_real_floating_point<fp_type> = nullptr>" => "")
      header = replace(header, "template <typename fp_type, internal::is_complex_floating_point<fp_type> = nullptr>" => "")
    end

    type_routine = ""
    if occursin("_scratchpad_size", header)
      type_routine = "scratchpad_size"
    elseif occursin("sycl::event", header)
      header = replace(header, "const std::vector<sycl::event> &events = {}" => "")
      header = replace(header, "const std::vector<sycl::event> &events = {}" => "")
      header = replace(header, "const std::vector<sycl::event> &event_list = {}" => "")
      header = replace(header, "std::vector<sycl::event> &dependencies = {}" => "")
      header = replace(header, "std::vector<sycl::event> &dependencies" => "")  # typo in "onemkl_sparse.cpp"
      type_routine = "usm"
    else
      type_routine = "buffer"
    end

    # Add a space for the returned argument
    header = replace(header, "sycl::event" => "sycl::event ")
    header = replace(header, "void" => "void ")

    # Replace the types
    header = replace(header, "sycl::queue &queue" => "syclQueue_t device_queue")
    header = replace(header, "sycl::queue& queue" => "syclQueue_t device_queue")

    if library ∈ ("blas", "sparse")
      header = replace(header, "compute_mode mode = MKL_BLAS_COMPUTE_MODE" => "")
      header = replace(header, "index_base base=index_base::zero" => "onemklIndex base")

      header = replace(header, "sycl::buffer<Ta> &" => "Ta *")
      header = replace(header, "sycl::buffer<Tb> &" => "Tb *")
      header = replace(header, "sycl::buffer<Tc> &" => "Tc *")
      header = replace(header, "sycl::buffer<Td> &" => "Td *")
      header = replace(header, "sycl::buffer<Treal> &" => "Treal *")
      header = replace(header, "sycl::buffer<Tres> &" => "Tres *")
      header = replace(header, "sycl::buffer<T> &" => "T *")

      header = replace(header, "sycl::buffer<Ta, 1> &" => "Ta *")
      header = replace(header, "sycl::buffer<Tb, 1> &" => "Tb *")
      header = replace(header, "sycl::buffer<Tc, 1> &" => "Tc *")
      header = replace(header, "sycl::buffer<Td, 1> &" => "Td *")
      header = replace(header, "sycl::buffer<Ti, 1> &" => "Ti *")
      header = replace(header, "sycl::buffer<Tf, 1> &" => "Tf *")
      header = replace(header, "sycl::buffer<Treal, 1> &" => "Treal *")
      header = replace(header, "sycl::buffer<Tres, 1> &" => "Tres *")
      header = replace(header, "sycl::buffer<T,1> &" => "T *")
      header = replace(header, "sycl::buffer<T, 1> &" => "T *")
      header = replace(header, "sycl::buffer<FpType, 1> &" => "FpType *")
      header = replace(header, "sycl::buffer<IntType, 1> &" => "IntType *")
    end

    header = replace(header, "sycl::buffer<float> &" => "float *")
    header = replace(header, "sycl::buffer<float>  &" => "float *")
    header = replace(header, "sycl::buffer<double> &" => "double *")
    header = replace(header, "sycl::buffer<std::complex<float>> &" => "float _Complex *")
    header = replace(header, "sycl::buffer<std::complex<float>>  &" => "float _Complex *")
    header = replace(header, "sycl::buffer<std::complex<double>> &" => "double _Complex *")
    header = replace(header, "sycl::buffer<int32_t> &" => "int32_t *")
    header = replace(header, "sycl::buffer<int64_t> &" => "int64_t *")

    header = replace(header, "sycl::buffer<float, 1> &" => "float *")
    header = replace(header, "sycl::buffer<double, 1> &" => "double *")
    header = replace(header, "sycl::buffer<std::complex<float>, 1> &" => "float _Complex *")
    header = replace(header, "sycl::buffer<std::complex<double>, 1> &" => "double _Complex *")
    header = replace(header, "sycl::buffer<std::uint8_t, 1> *" => "uint8_t *")
    header = replace(header, "sycl::buffer<int32_t, 1> &" => "int32_t *")
    header = replace(header, "sycl::buffer<int64_t, 1> &" => "int64_t *")
    header = replace(header, "sycl::buffer<int64_t, 1> *" => "int64_t *")

    header = replace(header, "std::complex<float>  *" => "float _Complex *")
    header = replace(header, "std::complex<float> *" => "float _Complex *")
    header = replace(header, "std::complex<double> *" => "double _Complex *")

    header = replace(header, "template <>\n" => "")
    header = replace(header, "<std::complex<float>>" => "")
    header = replace(header, "<std::complex<double>>" => "")
    header = replace(header, "<float>" => "")
    header = replace(header, "<double>" => "")

    header = replace(header, "oneapi::mkl::transpose" => "onemklTranspose")
    header = replace(header, "oneapi::mkl::uplo" => "onemklUplo")
    header = replace(header, "oneapi::mkl::diag" => "onemklDiag")
    header = replace(header, "oneapi::mkl::side" => "onemklSide")
    header = replace(header, "oneapi::mkl::offset" => "onemklOffset")
    header = replace(header, "oneapi::mkl::job" => "onemklJob")
    header = replace(header, "oneapi::mkl::generate" => "onemklGenerate")
    header = replace(header, "oneapi::mkl::compz" => "onemklCompz")
    header = replace(header, "oneapi::mkl::direct" => "onemklDirect")
    header = replace(header, "oneapi::mkl::storev" => "onemklStorev")
    header = replace(header, "oneapi::mkl::rangev" => "onemklRangev")
    header = replace(header, "oneapi::mkl::order" => "onemklOrder")
    header = replace(header, "oneapi::mkl::jobsvd" => "onemklJobsvd")
    header = replace(header, "oneapi::mkl::layout" => "onemklLayout")
    header = replace(header, "oneapi::mkl::index" => "onemklIndex")
    header = replace(header, "oneapi::mkl::property" => "onemklProperty")
    header = replace(header, "sparse::matmat_descr_t" => "matmat_descr_t")

    # Sanitize the header
    header = replace(header, " \\" => "")
    header = replace(header, "\n" => "")
    header = replace(header, "DLL_EXPORT " => "")
    header = replace(header, "const " => "")
    for i = 1:20
      header = replace(header, "  " => " ")
    end
    header = replace(header, "( " => "(")
    header = replace(header, ", )" => ")")
    header = replace(header, ",)" => ")")
    header = replace(header, " void" => "void")
    header = replace(header, " sycl::event" => "sycl::event")
    header = replace(header, "* const* " => "**")
    header = replace(header, "int64_t**" => "int64_t **")

    ind1 = findfirst(' ', header)
    ind2 = findfirst('(', header)
    name_routine = header[ind1+1:ind2-1]
    !haskey(routines, name_routine * type_routine) && (routines[name_routine * type_routine] = 0)
    (name_routine == "gesvd_scratchpad_size") && (routines[name_routine * type_routine] > 1) && continue
    routines[name_routine * type_routine] += 1

    # They use template for BLAS and SPARSE routines
    list_parameters, list_types, list_versions, list_suffix = analyzer_template(library, cpp_headers, name_routine)
    !isempty(list_parameters) && (type_routine == "buffer") && (library == "sparse") && continue  # Only wrap the USM version of sparse routines

    version = 'X'
    version = occursin("double", header) ? 'D' : version
    version = occursin("float", header) ? 'S' : version
    version = occursin("float _Complex", header) ? 'C' : version
    version = occursin("double _Complex", header) ? 'Z' : version
    version = occursin("_scratchpad_size", header) ? 'W' : version

    if version == 'W'
      # The version 'W' is used for routines with suffix "_scratchpad_size"
      versions = ('S', 'D', 'C', 'Z')
      mapreduce(x -> startswith(name_routine, x), |, ["or", "sy"]) && !startswith(name_routine, "sytrf") && (versions = ('S', 'D'))
      mapreduce(x -> startswith(name_routine, x), |, ["un", "he"]) && (versions = ('C', 'Z'))
      routines[name_routine * type_routine] = routines[name_routine * type_routine] - 1 + length(versions)
      for blas_version in versions
        copy_header = header
        copy_header = replace(copy_header, "typename fp_type::value_type" => version_types_header[blas_version])
        copy_header = replace(copy_header, "fp_type" => version_types_header[blas_version])
        copy_header = replace(copy_header, "fp" => version_types_header[blas_version])
        copy_header = replace(copy_header, name_routine => "onemkl$(blas_version)$(name_routine)")
        if name_routine ∈ ("heevx_scratchpad_size", "hegvx_scratchpad_size")
          copy_header = replace(copy_header, "typename float _Complex::value_type" => "float")
          copy_header = replace(copy_header, "typename double _Complex::value_type" => "double")
        end
        if occursin("batch", name_routine) && !occursin("*", header)
          copy_header = replace(copy_header, "_batch" => "_batch_strided")
        end
        push!(signatures, (copy_header, name_routine, blas_version, type_routine, template))
      end
    else
      if isempty(list_versions)
        # The routine "optimize_trsm" has two versions.
        suffix = ""
        (name_routine == "optimize_trsm") && occursin("columns", header) && (suffix = "_advanced")
        (name_routine == "optimize_gemm") && occursin("columns", header) && (suffix = "_advanced")
        name_routine ∈ ("set_csr_data", "set_coo_data") && occursin("int64_t", header) && (suffix = "_64")
        occursin("batch", name_routine) && !occursin("**", header) && (suffix = "_strided")

        header = replace(header, "$(name_routine)(" => "onemkl$(version)$(name_routine)$(suffix)(")
        header = replace(header, "void onemkl" => "int onemkl")
        header = replace(header, "sycl::event onemkl" => "int onemkl")
        if library == "sparse"
          if occursin("std::complex", header)
            (version == 'C') && (header = replace(header, "std::complex " => "float _Complex "))
            (version == 'Z') && (header = replace(header, "std::complex " => "double _Complex "))
          end
          header = replace(header, "transpose " => "onemklTranspose ")
          header = replace(header, "uplo " => "onemklUplo ")
          header = replace(header, "diag " => "onemklDiag ")
          header = replace(header, "side " => "onemklSide ")
          header = replace(header, "layout " => "onemklLayout ")
          header = replace(header, "index_base " => "onemklIndex ")
          header = replace(header, "property " => "onemklProperty ")
          header = replace(header, "sparse::matrix_view_descr " => "onemklMatrixView ")
          header = replace(header, "matrix_view_descr " => "onemklMatrixView ")
          header = replace(header, "sparse::matmat_request " => "onemklMatmatRequest ")
          header = replace(header, "omatconvert_alg " => "onemklOmatconvertAlg ")
          header = replace(header, "omatadd_alg " => "onemklOmataddAlg ")
          header = replace(header, name_routine => "sparse_" * name_routine)
        end
        push!(signatures, (header, name_routine, version, type_routine, template))
      else
        n = length(list_parameters)
        for (i, type) in enumerate(list_types)
          version = list_versions[i]
          suffix = list_suffix[i]
          version = (name_routine ∈ ("her", "herk", "her2k", "rotg", "nrm2", "asum", "hpr")) && (version == "CS") ? "C" : version
          version = (name_routine ∈ ("her", "herk", "her2k", "rotg", "nrm2", "asum", "hpr")) && (version == "ZD") ? "Z" : version

          copy_header = header
          for (j, parameter) in enumerate(reverse(list_parameters))
            k = n-j+1
            copy_header = replace(copy_header, parameter => type[k])
          end
          copy_header = replace(copy_header, "transpose " => "onemklTranspose ")
          copy_header = replace(copy_header, "uplo " => "onemklUplo ")
          copy_header = replace(copy_header, "diag " => "onemklDiag ")
          copy_header = replace(copy_header, "side " => "onemklSide ")
          copy_header = replace(copy_header, "layout " => "onemklLayout ")
          copy_header = replace(copy_header, "index_base " => "onemklIndex ")
          copy_header = replace(copy_header, "std::complex<float>" => "float _Complex")
          copy_header = replace(copy_header, "std::complex<double>" => "double _Complex")
          copy_header = replace(copy_header, "sycl::half" => "short")
          copy_header = replace(copy_header, name_routine => "onemkl$(version)$(name_routine)$(suffix)")
          copy_header = replace(copy_header, "sycl::event onemkl" => "int onemkl")
          copy_header = replace(copy_header, "void onemkl" => "int onemkl")
          if library == "sparse"
            copy_header = replace(copy_header, name_routine => "sparse_" * name_routine)
          end
          if occursin("batch", name_routine) && !occursin("**", header)
            copy_header = replace(copy_header, "_batch" => "_batch_strided")
          end
          if library == "blas"
            # Out-of-place variants of trsm and trmm
            if occursin("trsm", header) && occursin("ldc", header)
              copy_header = replace(copy_header, "trsm" => "trsm_variant")
            end
            if occursin("trmm", header) && occursin("ldc", header)
              copy_header = replace(copy_header, "trmm" => "trmm_variant")
            end
            copy_header = replace(copy_header, "compute_mode mode," => "")
            copy_header = replace(copy_header, ", compute_mode mode)" => ")")
            copy_header = replace(copy_header, "value_or_pointer<float _Complex>" => "float _Complex")
            copy_header = replace(copy_header, "value_or_pointer<double _Complex>" => "double _Complex")
            copy_header = replace(copy_header, "value_or_pointer<short>" => "short")
            copy_header = replace(copy_header, "value_or_pointer<float>" => "float")
            copy_header = replace(copy_header, "value_or_pointer<double>" => "double")
          end
          push!(signatures, (copy_header, name_routine, version, type_routine, template))
        end
      end
    end
  end

  # Check the number of methods
  blacklist = String[]
  for name_routine in keys(routines)
    if (routines[name_routine] > 4)
      if occursin("set_csr_data", name_routine) || occursin("set_coo_data", name_routine) || occursin("_batch", name_routine)
        if (routines[name_routine] > 8)
          @warn "The routine $(name_routine) has $(routines[name_routine]) and will not be interfaced."
          push!(blacklist, name_routine)
        end
      else
        @warn "The routine $(name_routine) has $(routines[name_routine]) and will not be interfaced."
        push!(blacklist, name_routine)
      end
    end
  end

  path_oneapi_headers = joinpath(@__DIR__, output)
  oneapi_headers = open(path_oneapi_headers, "w")

  for (header, name_routine, version, type_routine, template) in signatures
    # Blacklist
    (name_routine in blacklist) && continue

    # Pass scalars (e.g. alpha/beta inputs) as references instead of values
    for type in ("short", "float", "double", "float _Complex", "double _Complex")
      header = replace(header, Regex("$type ([A-Za-z0-9]+(?![^,]*[_*]))[^,]*,") => SubstitutionString("$type $pattern\\1,"))
      header = replace(header, Regex(", $type ([A-Za-z0-9)]+(?![^,]*[_*]))[^,]*") => SubstitutionString(", $type $pattern\\1"))
    end

    push!(signatures2, (header, name_routine, version, type_routine, template))

    pos = findfirst('(', header)
    fun = split(header, " ")
    len = 0
    for (i, part) in enumerate(fun)
      len += length(part)
      if len ≤ 90
        (i ≠ 1) && write(oneapi_headers, " ")
        write(oneapi_headers, part)
      else
        write(oneapi_headers, "\n")
        for i = 1:pos
          write(oneapi_headers, " ")
        end
        write(oneapi_headers, part)
        len = pos + length(part)
      end
    end
    write(oneapi_headers, ";\n\n")
  end
  close(oneapi_headers)
  return signatures2
end

function generate_cpp(library::String, filename::Vector{String}, output::String; pattern::String="")
  signatures = generate_headers(library, filename, output; pattern)
  path_oneapi_cpp = joinpath(@__DIR__, output)
  oneapi_cpp = open(path_oneapi_cpp, "w")
  for (header, name, version, type_routine, template) in signatures
    parameters = split(header, "(")[2]
    parameters = split(parameters, ")")[1]
    parameters = replace(parameters, "syclQueue_t device_queue" => "device_queue->val")
    parameters = replace(parameters, "int32_t* " => "")
    parameters = replace(parameters, "int32_t " => "")
    parameters = replace(parameters, "int64_t* " => "")
    parameters = replace(parameters, "int64_t " => "")
    parameters = replace(parameters, "matrix_handle_t *" => "(oneapi::mkl::sparse::matrix_handle_t*) ")
    parameters = replace(parameters, "matrix_handle_t " => "(oneapi::mkl::sparse::matrix_handle_t) ")
    parameters = replace(parameters, "matmat_descr_t *" => "(oneapi::mkl::sparse::matmat_descr_t*) ")
    parameters = replace(parameters, "matmat_descr_t " => "(oneapi::mkl::sparse::matmat_descr_t) ")
    parameters = replace(parameters, "omatadd_descr_t *" => "(oneapi::mkl::sparse::omatadd_descr_t*) ")
    parameters = replace(parameters, "omatadd_descr_t " => "(oneapi::mkl::sparse::omatadd_descr_t) ")
    parameters = replace(parameters, "omatconvert_descr_t *" => "(oneapi::mkl::sparse::omatconvert_descr_t*) ")
    parameters = replace(parameters, "omatconvert_descr_t " => "(oneapi::mkl::sparse::omatconvert_descr_t) ")
    parameters = replace(parameters, "short **" => "reinterpret_cast<sycl::half **>")
    parameters = replace(parameters, "float _Complex **" => "reinterpret_cast<std::complex<float> **>")
    parameters = replace(parameters, "double _Complex **" => "reinterpret_cast<std::complex<double> **>")
    parameters = replace(parameters, "short *" => "reinterpret_cast<sycl::half *>")
    parameters = replace(parameters, "float _Complex *" => "reinterpret_cast<std::complex<float> *>")
    parameters = replace(parameters, "double _Complex *" => "reinterpret_cast<std::complex<double> *>")
    parameters = replace(parameters, "short " => "sycl::bit_cast<sycl::half>")
    parameters = replace(parameters, "float _Complex " => "static_cast<std::complex<float> >")
    parameters = replace(parameters, "double _Complex " => "static_cast<std::complex<double> >")
    parameters = replace(parameters, ", float *" => ", ")
    parameters = replace(parameters, ", double *" => ", ")
    parameters = replace(parameters, ", float " => ", ")
    parameters = replace(parameters, ", double " => ", ")
    parameters = replace(parameters, ", **" => ", ")
    parameters = replace(parameters, ", *" => ", ")
    parameters = replace(parameters, "onemklTranspose *trans," => "convert(trans, group_count),")
    parameters = replace(parameters, "onemklTranspose* trans," => "convert(trans, group_count),")
    parameters = replace(parameters, "onemklUplo *uplo," => "convert(uplo, group_count),")
    parameters = replace(parameters, "onemklUplo* uplo," => "convert(uplo, group_count),")
    parameters = replace(parameters, "onemklDiag *diag," => "convert(diag, group_count),")
    parameters = replace(parameters, "onemklDiag* diag," => "convert(diag, group_count),")
    parameters = replace(parameters, "onemklSide *side," => "convert(side, group_count),")
    parameters = replace(parameters, "onemklSide* side," => "convert(side, group_count),")

    for type in ("onemklTranspose", "onemklSide", "onemklUplo", "onemklDiag", "onemklGenerate",
                 "onemklLayout", "onemklJob", "onemklJobsvd", "onemklCompz", "onemklRangev",
                 "onemklIndex", "onemklProperty", "onemklMatrixView", "onemklMatmatRequest",
                 "onemklOmatconvertAlg", "onemklOmataddAlg")
      parameters = replace(parameters, Regex("$type ([A-Za-z0-9_]+),") => SubstitutionString("convert(\\1),"))
      parameters = replace(parameters, Regex(", $type ([A-Za-z0-9_]+)") => SubstitutionString(", convert(\\1)"))
    end

    # Pass scalars (e.g. alpha/beta inputs) as references instead of values
    header = replace(header, "§" => "*")
    parameters = replace(parameters, ", §" => ", *")
    parameters = replace(parameters, ", sycl::bit_cast<sycl::half>§" => ", *reinterpret_cast<sycl::half *>")
    parameters = replace(parameters, ", static_cast<std::complex<float> >§" => ", *reinterpret_cast<std::complex<float> *>")
    parameters = replace(parameters, ", static_cast<std::complex<double> >§" => ", *reinterpret_cast<std::complex<double> *>")

    parameters = replace(parameters, r"half>([A-Za-z0-9_]+)" => s"half>(\1)")
    parameters = replace(parameters, r" >([A-Za-z0-9_]+)" => s" >(\1)")
    parameters = replace(parameters, r" \*>([A-Za-z0-9_]+)" => s"*>(\1)")
    parameters = replace(parameters, r" \*\*>([A-Za-z0-9_]+)" => s"**>(\1)")

    variant = ""
    if library == "blas"
      variant = "column_major::"
    end

    # Build catch clause: LAPACK functions also catch computation_error for info
    lapack_catch = "catch (const oneapi::mkl::lapack::computation_error& e) { return e.info(); } catch (const sycl::exception& e) { return -1; }"
    sycl_catch = "catch (const sycl::exception& e) { return -1; }"

    write(oneapi_cpp, "extern \"C\" $header {\n")
    if template
      type = version_types[version]
      if !occursin("scratchpad_size", name)
        catch_clause = library == "lapack" ? lapack_catch : sycl_catch
        write(oneapi_cpp, "   try {\n")
        write(oneapi_cpp, "      auto status = oneapi::mkl::$library::$variant$name<$type>($parameters, {});\n")
        write(oneapi_cpp, "      device_queue->val.wait_and_throw();\n")
        write(oneapi_cpp, "   } $catch_clause\n")
      end
      if occursin("scratchpad_size", name)
        write(oneapi_cpp, "   int64_t scratchpad_size = oneapi::mkl::$library::$variant$name<$type>($parameters);\n   device_queue->val.wait_and_throw();\n")
      end
    else
      if !(name ∈ void_output)
        has_queue = occursin("device_queue", parameters)
        is_scratchpad = occursin("scratchpad_size", name)
        if has_queue && !is_scratchpad
          catch_clause = library == "lapack" ? lapack_catch : sycl_catch
          write(oneapi_cpp, "   try {\n")
          write(oneapi_cpp, "      auto status = oneapi::mkl::$library::$variant$name($parameters, {});\n")
          write(oneapi_cpp, "      device_queue->val.wait_and_throw();\n")
          write(oneapi_cpp, "   } $catch_clause\n")
        else
          write(oneapi_cpp, "   auto status = oneapi::mkl::$library::$variant$name($parameters, {});\n")
          if has_queue
            write(oneapi_cpp, "   device_queue->val.wait_and_throw();\n")
          end
        end
      else
        if occursin("device_queue", parameters)
          write(oneapi_cpp, "   try {\n")
          write(oneapi_cpp, "      oneapi::mkl::$library::$variant$name($parameters);\n")
          write(oneapi_cpp, "      device_queue->val.wait_and_throw();\n")
          write(oneapi_cpp, "   } $sycl_catch\n")
        else
          write(oneapi_cpp, "   oneapi::mkl::$library::$variant$name($parameters);\n")
        end
      end
    end
    if occursin("scratchpad_size", name)
      write(oneapi_cpp, "   return scratchpad_size;\n")
    else
      write(oneapi_cpp, "   return 0;\n")
    end
    write(oneapi_cpp, "}")
    write(oneapi_cpp, "\n\n")
  end
  close(oneapi_cpp)
end

# Generate "src/onemkl.h"
generate_headers("blas", blas, "onemkl_blas.h", pattern="*")
generate_headers("lapack", lapack, "onemkl_lapack.h", pattern="*")
generate_headers("sparse", sparse, "onemkl_sparse.h", pattern="*")

io = open("src/onemkl.h", "w")
headers_prologue = read("onemkl_prologue.h", String)
write(io, headers_prologue)
headers_blas = read("onemkl_blas.h", String)
write(io, "// BLAS\n")
write(io, headers_blas)
headers_lapack = read("onemkl_lapack.h", String)
write(io, "// LAPACK\n")
write(io, headers_lapack)
headers_sparse = read("onemkl_sparse.h", String)
write(io, "// SPARSE\n")
write(io, headers_sparse)
headers_epilogue = read("onemkl_epilogue.h", String)
write(io, headers_epilogue)
close(io)

# Add the version of oneMKL in src/onemkl.h
headers_onemkl = read("src/onemkl.h", String)
version_onemkl = pkgversion(oneAPI_Support_Headers_jll)
headers_onemkl = replace(headers_onemkl, "void onemkl_version" => "const int64_t ONEMKL_VERSION_MAJOR = $(version_onemkl.major);\nconst int64_t ONEMKL_VERSION_MINOR = $(version_onemkl.minor);\nconst int64_t ONEMKL_VERSION_PATCH = $(version_onemkl.patch);\nvoid onemkl_version")
write("src/onemkl.h", headers_onemkl)

# Generate "src/onemkl.cpp"
generate_cpp("blas", blas, "onemkl_blas.cpp", pattern="§")
generate_cpp("lapack", lapack, "onemkl_lapack.cpp", pattern="§")
generate_cpp("sparse", sparse, "onemkl_sparse.cpp", pattern="§")

io = open("src/onemkl.cpp", "w")
cpp_prologue = read("onemkl_prologue.cpp", String)
write(io, cpp_prologue)
cpp_blas = read("onemkl_blas.cpp", String)
write(io, "// BLAS\n")
write(io, cpp_blas)
cpp_lapack = read("onemkl_lapack.cpp", String)
write(io, "// LAPACK\n")
write(io, cpp_lapack)
cpp_sparse = read("onemkl_sparse.cpp", String)
write(io, "// SPARSE\n")
write(io, cpp_sparse)
cpp_epilogue = read("onemkl_epilogue.cpp", String)
write(io, cpp_epilogue)
close(io)


================================================
FILE: deps/onemkl_epilogue.cpp
================================================
extern "C" int onemklXsparse_matmat(syclQueue_t device_queue, matrix_handle_t A, matrix_handle_t B, matrix_handle_t C, onemklMatmatRequest req, matmat_descr_t descr, int64_t *sizeTempBuffer, void *tempBuffer) {
   auto status = oneapi::mkl::sparse::matmat(device_queue->val, (oneapi::mkl::sparse::matrix_handle_t) A, (oneapi::mkl::sparse::matrix_handle_t) B, (oneapi::mkl::sparse::matrix_handle_t) C, convert(req), (oneapi::mkl::sparse::matmat_descr_t) descr, sizeTempBuffer, tempBuffer, {});
   device_queue->val.wait_and_throw();
   return 0;
}

// other

// oneMKL keeps a cache of SYCL queues and tries to destroy them when unloading the library.
// that is incompatible with oneAPI.jl destroying queues before that, so call mkl_free_buffers
// to manually wipe the device cache when we're destroying queues.

extern "C" int onemklDestroy() {
    mkl_free_buffers();
    return 0;
}


================================================
FILE: deps/onemkl_epilogue.h
================================================
int onemklXsparse_matmat(syclQueue_t device_queue, matrix_handle_t A, matrix_handle_t B,
                         matrix_handle_t C, onemklMatmatRequest req, matmat_descr_t
                         descr, int64_t *sizeTempBuffer, void *tempBuffer);

int onemklDestroy(void);
#ifdef __cplusplus
}
#endif


================================================
FILE: deps/onemkl_prologue.cpp
================================================
#include "onemkl.h"
#include "sycl.hpp"
#include <iostream>
#include <exception>
#include <memory>
#include <oneapi/mkl.hpp>

oneapi::mkl::transpose convert(onemklTranspose val) {
    switch (val) {
    case ONEMKL_TRANSPOSE_NONTRANS:
        return oneapi::mkl::transpose::nontrans;
    case ONEMKL_TRANSPOSE_TRANS:
        return oneapi::mkl::transpose::trans;
    case ONEMLK_TRANSPOSE_CONJTRANS:
        return oneapi::mkl::transpose::conjtrans;
    }
}

oneapi::mkl::transpose* convert(const onemklTranspose* vals, int64_t size) {
    oneapi::mkl::transpose* result = new oneapi::mkl::transpose[size];
    for (int64_t i = 0; i < size; ++i) {
        switch (vals[i]) {
            case ONEMKL_TRANSPOSE_NONTRANS:
                result[i] = oneapi::mkl::transpose::nontrans;
                break;
            case ONEMKL_TRANSPOSE_TRANS:
                result[i] = oneapi::mkl::transpose::trans;
                break;
            case ONEMLK_TRANSPOSE_CONJTRANS:
                result[i] = oneapi::mkl::transpose::conjtrans;
                break;
        }
    }
    return result;
}

oneapi::mkl::uplo convert(onemklUplo val) {
    switch(val) {
        case ONEMKL_UPLO_UPPER:
            return oneapi::mkl::uplo::upper;
        case ONEMKL_UPLO_LOWER:
            return oneapi::mkl::uplo::lower;
    }
}

oneapi::mkl::uplo* convert(const onemklUplo* vals, int64_t size) {
    oneapi::mkl::uplo* result = new oneapi::mkl::uplo[size];
    for (int64_t i = 0; i < size; ++i) {
        switch (vals[i]) {
            case ONEMKL_UPLO_UPPER:
                result[i] = oneapi::mkl::uplo::upper;
                break;
            case ONEMKL_UPLO_LOWER:
                result[i] = oneapi::mkl::uplo::lower;
                break;
        }
    }
    return result;
}

oneapi::mkl::diag convert(onemklDiag val) {
    switch(val) {
        case ONEMKL_DIAG_NONUNIT:
            return oneapi::mkl::diag::nonunit;
        case ONEMKL_DIAG_UNIT:
            return oneapi::mkl::diag::unit;
    }
}

oneapi::mkl::diag* convert(const onemklDiag* vals, int64_t size) {
    oneapi::mkl::diag* result = new oneapi::mkl::diag[size];
    for (int64_t i = 0; i < size; ++i) {
        switch (vals[i]) {
            case ONEMKL_DIAG_NONUNIT:
                result[i] = oneapi::mkl::diag::nonunit;
                break;
            case ONEMKL_DIAG_UNIT:
                result[i] = oneapi::mkl::diag::unit;
                break;
        }
    }
    return result;
}

oneapi::mkl::side convert(onemklSide val) {
    switch (val) {
    case ONEMKL_SIDE_LEFT:
        return oneapi::mkl::side::left;
    case ONEMKL_SIDE_RIGHT:
        return oneapi::mkl::side::right;
    }
}

oneapi::mkl::side* convert(const onemklSide* vals, int64_t size) {
    oneapi::mkl::side* result = new oneapi::mkl::side[size];
    for (int64_t i = 0; i < size; ++i) {
        switch (vals[i]) {
            case ONEMKL_SIDE_LEFT:
                result[i] = oneapi::mkl::side::left;
                break;
            case ONEMKL_SIDE_RIGHT:
                result[i] = oneapi::mkl::side::right;
                break;
        }
    }
    return result;
}

oneapi::mkl::offset convert(onemklOffset val) {
    switch (val) {
    case ONEMKL_OFFSET_ROW:
        return oneapi::mkl::offset::row;
    case ONEMKL_OFFSET_COL:
        return oneapi::mkl::offset::column;
    case ONEMKL_OFFSET_FIX:
        return oneapi::mkl::offset::fix;
    }
}

oneapi::mkl::job convert(onemklJob val) {
    switch (val) {
    case ONEMKL_JOB_N:
        return oneapi::mkl::job::N;
    case ONEMKL_JOB_V:
        return oneapi::mkl::job::V;
    case ONEMKL_JOB_U:
        return oneapi::mkl::job::U;
    case ONEMKL_JOB_A:
        return oneapi::mkl::job::A;
    case ONEMKL_JOB_S:
        return oneapi::mkl::job::S;
    case ONEMKL_JOB_O:
        return oneapi::mkl::job::O;
    }
}

oneapi::mkl::generate convert(onemklGenerate val) {
    switch (val) {
    case ONEMKL_GENERATE_Q:
        return oneapi::mkl::generate::Q;
    case ONEMKL_GENERATE_P:
        return oneapi::mkl::generate::P;
    case ONEMKL_GENERATE_N:
        return oneapi::mkl::generate::N;
    case ONEMKL_GENERATE_V:
        return oneapi::mkl::generate::V;
    }
}

oneapi::mkl::compz convert(onemklCompz val) {
    switch (val) {
    case ONEMKL_COMPZ_N:
        return oneapi::mkl::compz::N;
    case ONEMKL_COMPZ_V:
        return oneapi::mkl::compz::V;
    case ONEMKL_COMPZ_I:
        return oneapi::mkl::compz::I;
    }
}

oneapi::mkl::direct convert(onemklDirect val) {
    switch (val) {
    case ONEMKL_DIRECT_F:
        return oneapi::mkl::direct::F;
    case ONEMKL_DIRECT_B:
        return oneapi::mkl::direct::B;
    }
}

oneapi::mkl::storev convert(onemklStorev val) {
    switch (val) {
    case ONEMKL_STOREV_C:
        return oneapi::mkl::storev::C;
    case ONEMKL_STOREV_R:
        return oneapi::mkl::storev::R;
    }
}

oneapi::mkl::rangev convert(onemklRangev val) {
    switch (val) {
    case ONEMKL_RANGEV_A:
        return oneapi::mkl::rangev::A;
    case ONEMKL_RANGEV_V:
        return oneapi::mkl::rangev::V;
    case ONEMKL_RANGEV_I:
        return oneapi::mkl::rangev::I;
    }
}

oneapi::mkl::order convert(onemklOrder val) {
    switch (val) {
    case ONEMKL_ORDER_B:
        return oneapi::mkl::order::B;
    case ONEMKL_ORDER_E:
        return oneapi::mkl::order::E;
    }
}

oneapi::mkl::jobsvd convert(onemklJobsvd val) {
    switch (val) {
    case ONEMKL_JOBSVD_N:
        return oneapi::mkl::jobsvd::N;
    case ONEMKL_JOBSVD_A:
        return oneapi::mkl::jobsvd::A;
    case ONEMKL_JOBSVD_O:
        return oneapi::mkl::jobsvd::O;
    case ONEMKL_JOBSVD_S:
        return oneapi::mkl::jobsvd::S;
    }
}

oneapi::mkl::layout convert(onemklLayout val) {
    switch (val) {
    case ONEMKL_LAYOUT_ROW:
        return oneapi::mkl::layout::row_major;
    case ONEMKL_LAYOUT_COL:
        return oneapi::mkl::layout::col_major;
    }
}

oneapi::mkl::index_base convert(onemklIndex val) {
    switch (val) {
    case ONEMKL_INDEX_ZERO:
        return oneapi::mkl::index_base::zero;
    case ONEMKL_INDEX_ONE:
        return oneapi::mkl::index_base::one;
    }
}

oneapi::mkl::sparse::property convert(onemklProperty val) {
    switch (val) {
    case ONEMKL_PROPERTY_SYMMETRIC:
        return oneapi::mkl::sparse::property::symmetric;
    case ONEMKL_PROPERTY_SORTED:
        return oneapi::mkl::sparse::property::sorted;
    }
}

oneapi::mkl::sparse::matrix_view_descr convert(onemklMatrixView val) {
    switch (val) {
    case ONEMKL_MATRIX_VIEW_GENERAL:
        return oneapi::mkl::sparse::matrix_view_descr::general;
    }
}

oneapi::mkl::sparse::matmat_request convert(onemklMatmatRequest val) {
    switch (val) {
    case ONEMKL_MATMAT_REQUEST_GET_WORK_ESTIMATION_BUF_SIZE:
        return oneapi::mkl::sparse::matmat_request::get_work_estimation_buf_size;
    case ONEMKL_MATMAT_REQUEST_WORK_ESTIMATION:
        return oneapi::mkl::sparse::matmat_request::work_estimation;
    case ONEMKL_MATMAT_REQUEST_GET_COMPUTE_STRUCTURE_BUF_SIZE:
        return oneapi::mkl::sparse::matmat_request::get_compute_structure_buf_size;
    case ONEMKL_MATMAT_REQUEST_COMPUTE_STRUCTURE:
        return oneapi::mkl::sparse::matmat_request::compute_structure;
    case ONEMKL_MATMAT_REQUEST_FINALIZE_STRUCTURE:
        return oneapi::mkl::sparse::matmat_request::finalize_structure;
    case ONEMKL_MATMAT_REQUEST_GET_COMPUTE_BUF_SIZE:
        return oneapi::mkl::sparse::matmat_request::get_compute_buf_size;
    case ONEMKL_MATMAT_REQUEST_COMPUTE:
        return oneapi::mkl::sparse::matmat_request::compute;
    case ONEMKL_MATMAT_REQUEST_GET_NNZ:
        return oneapi::mkl::sparse::matmat_request::get_nnz;
    case ONEMKL_MATMAT_REQUEST_FINALIZE:
        return oneapi::mkl::sparse::matmat_request::finalize;
    }
}

oneapi::mkl::sparse::omatconvert_alg convert(onemklOmatconvertAlg val) {
    switch (val) {
    case ONEMKL_OMATCONVERT_DEFAULT_ALG:
        return oneapi::mkl::sparse::omatconvert_alg::default_alg;
    }
}

oneapi::mkl::sparse::omatadd_alg convert(onemklOmataddAlg val) {
    switch (val) {
    case ONEMKL_OMATADD_DEFAULT_ALG:
        return oneapi::mkl::sparse::omatadd_alg::default_alg;
    }
}

// version
extern "C" void onemkl_version(int64_t *major, int64_t *minor, int64_t *patch) {
    *major = ONEMKL_VERSION_MAJOR;
    *minor = ONEMKL_VERSION_MINOR;
    *patch = ONEMKL_VERSION_PATCH;
    return;
}

// gemm
// https://spec.oneapi.io/versions/1.0-rev-1/elements/oneMKL/source/domains/blas/gemm.html
class gemmBatchInfo {
    public:
        oneapi::mkl::transpose *m_transa = nullptr;
        oneapi::mkl::transpose *m_transb = nullptr;
        sycl::device m_device;
        sycl::context m_context;
        oneapi::mkl::transpose m_ta;
        oneapi::mkl::transpose m_tb;
        // Constructor
        gemmBatchInfo(syclQueue_t device_queue,
                    int64_t group_count,
                    onemklTranspose transa,
                    onemklTranspose transb) {
            // Get device and context info from device_queue
            auto main_queue = device_queue->val;
            m_device = main_queue.get_device();
            m_context = main_queue.get_context();

            // Allocate transpose shared buffers
            try {
                m_transa = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose),
                                                                    m_device, m_context);
                m_transb = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose),
                                                                    m_device, m_context);
                m_ta = convert(transa);
                m_tb = convert(transb);
            } catch(const std::bad_alloc& e) {
                std::cerr << "Error: " << e.what() << std::endl;
            }

            // Initialize
            for (int i = 0; i < group_count; i++) {
                m_transa[i] = m_ta;
                m_transb[i] = m_tb;
            }
        };

        // Destructor
        ~gemmBatchInfo() {
            free(m_transa, m_context);
            free(m_transb, m_context);
        }
};

class trsmBatchInfo {
    public:
        oneapi::mkl::transpose *m_transa = nullptr;
        oneapi::mkl::side *m_leftright = nullptr;
        oneapi::mkl::uplo *m_upperlower = nullptr;
        oneapi::mkl::diag *m_unitdiag = nullptr;
        sycl::device m_device;
        sycl::context m_context;
        oneapi::mkl::transpose m_ta;
        oneapi::mkl::side m_side;
        oneapi::mkl::uplo m_uplo;
        oneapi::mkl::diag m_diag;

        // Constructor
        trsmBatchInfo(syclQueue_t device_queue,
                    onemklSide left_right,
                    onemklUplo upper_lower,
                    onemklTranspose transa,
                    onemklDiag unit_diag,
                    int64_t group_count) {
            // Get device and context info from device_queue
            auto main_queue = device_queue->val;
            m_device = main_queue.get_device();
            m_context = main_queue.get_context();
            try {
                // Allocate uniform arrays of group_size and transpose_a, transpose_b supporting oneMKL
                // gemm_batch API
                m_transa = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose),
                                                                    m_device, m_context);
                m_leftright = (oneapi::mkl::side *) malloc_shared(group_count * sizeof(oneapi::mkl::side),
                                                                m_device, m_context);
                m_upperlower = (oneapi::mkl::uplo *) malloc_shared(group_count * sizeof(oneapi::mkl::uplo),
                                                                m_device, m_context);
                m_unitdiag = (oneapi::mkl::diag *) malloc_shared(group_count * sizeof(oneapi::mkl::diag),
                                                                m_device, m_context);
                m_ta = convert(transa);
                m_side = convert(left_right);
                m_uplo = convert(upper_lower);
                m_diag = convert(unit_diag);
            } catch(const std::bad_alloc& e) {
                std::cerr << "Error: " << e.what() << std::endl;
            }
            // Initialize
            for (int i = 0; i < group_count; i++) {
                m_transa[i] = m_ta;
                m_leftright[i] = m_side;
                m_upperlower[i] = m_uplo;
                m_unitdiag[i] = m_diag;
            }
        };

        // Destructor
        ~trsmBatchInfo() {
            free(m_transa, m_context);
            free(m_upperlower, m_context);
            free(m_unitdiag, m_context);
            free(m_leftright, m_context);
        }
};

extern "C" int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                                 onemklTranspose transb, int64_t *m,
                                 int64_t *n, int64_t *k, uint16_t *alpha,
                                 const short **a, int64_t *lda, const short **b,
                                 int64_t *ldb, uint16_t *beta, short **c,
                                 int64_t *ldc, int64_t group_count, int64_t *group_size) {
    gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb);
    device_queue->val.wait_and_throw();
    auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val,
                        &gemmInfo.m_transa[0], &gemmInfo.m_transb[0],
                        m, n, k, reinterpret_cast<sycl::half *>(alpha),
                        reinterpret_cast<const sycl::half **>(&a[0]), lda,
                        reinterpret_cast<const sycl::half **>(&b[0]), ldb,
                        reinterpret_cast<sycl::half *>(beta), reinterpret_cast<sycl::half **>(&c[0]),
                        ldc, group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                                 onemklTranspose transb, int64_t *m,
                                 int64_t *n, int64_t *k, float *alpha,
                                 const float **a, int64_t *lda, const float **b,
                                 int64_t *ldb, float *beta, float **c,
                                 int64_t *ldc, int64_t group_count, int64_t *group_size) {
    gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb);
    device_queue->val.wait_and_throw();
    auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val,
                        &gemmInfo.m_transa[0], &gemmInfo.m_transb[0],
                        m, n, k, alpha,
                        (const float **)&a[0], lda,
                        (const float **)&b[0], ldb,
                        beta, &c[0], ldc,
                        group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                                 onemklTranspose transb, int64_t *m,
                                 int64_t *n, int64_t *k, double *alpha,
                                 const double **a, int64_t *lda, const double **b,
                                 int64_t *ldb, double *beta, double **c,
                                 int64_t *ldc, int64_t group_count, int64_t *group_size) {
    gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb);
    device_queue->val.wait_and_throw();
    auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val,
                        &gemmInfo.m_transa[0], &gemmInfo.m_transb[0],
                        m, n, k, alpha,
                        (const double **)&a[0], lda,
                        (const double **)&b[0], ldb,
                        beta, &c[0], ldc,
                        group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                                 onemklTranspose transb, int64_t *m,
                                 int64_t *n, int64_t *k, float _Complex *alpha,
                                 const float _Complex **a, int64_t *lda,
                                 const float _Complex **b,
                                 int64_t *ldb, float _Complex *beta, float _Complex **c,
                                 int64_t *ldc, int64_t group_count, int64_t *group_size) {
    gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb);
    device_queue->val.wait_and_throw();
    auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val,
                        &gemmInfo.m_transa[0], &gemmInfo.m_transb[0],
                        m, n, k, reinterpret_cast<std::complex<float> *>(alpha),
                        reinterpret_cast<const std::complex<float> **>(&a[0]),
                        lda,
                        reinterpret_cast<const std::complex<float> **>(&b[0]),
                        ldb,
                        reinterpret_cast<std::complex<float> *>(beta),
                        reinterpret_cast<std::complex<float> **>(&c[0]), ldc,
                        group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                                 onemklTranspose transb, int64_t *m,
                                 int64_t *n, int64_t *k, double _Complex *alpha,
                                 const double _Complex **a, int64_t *lda,
                                 const double _Complex **b,
                                 int64_t *ldb, double _Complex *beta,
                                 double _Complex **c,
                                 int64_t *ldc, int64_t group_count, int64_t *group_size) {
    gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb);
    device_queue->val.wait_and_throw();
    auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val,
                        &gemmInfo.m_transa[0], &gemmInfo.m_transb[0],
                        m, n, k, reinterpret_cast<std::complex<double> *>(alpha),
                        reinterpret_cast<const std::complex<double> **>(&a[0]),
                        lda,
                        reinterpret_cast<const std::complex<double> **>(&b[0]),
                        ldb,
                        reinterpret_cast<std::complex<double> *>(beta),
                        reinterpret_cast<std::complex<double> **>(&c[0]), ldc,
                        group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                                 onemklUplo upper_lower, onemklTranspose transa,
                                 onemklDiag unit_diag, int64_t *m, int64_t *n, float *alpha,
                                 const float **a, int64_t *lda, float **b, int64_t *ldb,
                                 int64_t group_count, int64_t *group_size) {
    trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa,
                           unit_diag, group_count);
    device_queue->val.wait_and_throw();

    auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val,
                        &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0],
                        &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0],
                        m, n, alpha, (const float **)&a[0], lda,
                        &b[0], ldb, group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                                 onemklUplo upper_lower, onemklTranspose transa,
                                 onemklDiag unit_diag, int64_t *m, int64_t *n,
                                 double *alpha, const double **a, int64_t *lda,
                                 double **b, int64_t *ldb, int64_t group_count,
                                 int64_t *group_size) {
    trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa,
                                unit_diag, group_count);
    device_queue->val.wait_and_throw();

    auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val,
                        &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0],
                        &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0],
                        m, n, alpha, (const double **)&a[0], lda, &b[0],
                        ldb, group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                                 onemklUplo upper_lower, onemklTranspose transa,
                                 onemklDiag unit_diag, int64_t *m, int64_t *n,
                                 float _Complex *alpha, const float _Complex **a,
                                 int64_t *lda, float _Complex **b, int64_t *ldb,
                                 int64_t group_count, int64_t *group_size) {
    trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa,
                                unit_diag, group_count);
    device_queue->val.wait_and_throw();

    auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val,
                        &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0],
                        &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0],
                        m, n, reinterpret_cast<std::complex<float> *>(alpha),
                        reinterpret_cast<const std::complex<float> **>(&a[0]),
                        lda, reinterpret_cast<std::complex<float> **>(&b[0]),
                        ldb, group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                                 onemklUplo upper_lower, onemklTranspose transa,
                                 onemklDiag unit_diag, int64_t *m, int64_t *n,
                                 double _Complex *alpha, const double _Complex **a,
                                 int64_t *lda, double _Complex **b, int64_t *ldb,
                                 int64_t group_count, int64_t *group_size) {
    trsmBatchInfo trsmInfo(device_queue, left_right,
                                upper_lower, transa, unit_diag, group_count);
    device_queue->val.wait_and_throw();

    auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val,
                        &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0],
                        &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0],
                        m, n, reinterpret_cast<std::complex<double> *>(alpha),
                        reinterpret_cast<const std::complex<double> **>(&a[0]),
                        lda, reinterpret_cast<std::complex<double> **>(&b[0]),
                        ldb, group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}


================================================
FILE: deps/onemkl_prologue.h
================================================
#pragma once

#include "sycl.h"

#include <stddef.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

// BLAS types
typedef enum {
    ONEMKL_TRANSPOSE_NONTRANS,
    ONEMKL_TRANSPOSE_TRANS,
    ONEMLK_TRANSPOSE_CONJTRANS
} onemklTranspose;

typedef enum {
    ONEMKL_UPLO_UPPER,
    ONEMKL_UPLO_LOWER
} onemklUplo;

typedef enum {
    ONEMKL_DIAG_NONUNIT,
    ONEMKL_DIAG_UNIT
 } onemklDiag;

typedef enum {
    ONEMKL_SIDE_LEFT,
    ONEMKL_SIDE_RIGHT
} onemklSide;

typedef enum {
    ONEMKL_OFFSET_ROW,
    ONEMKL_OFFSET_COL,
    ONEMKL_OFFSET_FIX,
} onemklOffset;

// LAPACK types
typedef enum {
    ONEMKL_JOB_N,
    ONEMKL_JOB_V,
    ONEMKL_JOB_U,
    ONEMKL_JOB_A,
    ONEMKL_JOB_S,
    ONEMKL_JOB_O
} onemklJob;

typedef enum {
    ONEMKL_GENERATE_Q,
    ONEMKL_GENERATE_P,
    ONEMKL_GENERATE_N,
    ONEMKL_GENERATE_V
} onemklGenerate;

typedef enum {
    ONEMKL_COMPZ_N,
    ONEMKL_COMPZ_V,
    ONEMKL_COMPZ_I
} onemklCompz;

typedef enum {
    ONEMKL_DIRECT_F,
    ONEMKL_DIRECT_B
} onemklDirect;

typedef enum {
    ONEMKL_STOREV_C,
    ONEMKL_STOREV_R
} onemklStorev;

typedef enum {
    ONEMKL_RANGEV_A,
    ONEMKL_RANGEV_V,
    ONEMKL_RANGEV_I
} onemklRangev;

typedef enum {
    ONEMKL_ORDER_B,
    ONEMKL_ORDER_E
} onemklOrder;

typedef enum {
    ONEMKL_JOBSVD_N,
    ONEMKL_JOBSVD_A,
    ONEMKL_JOBSVD_O,
    ONEMKL_JOBSVD_S
} onemklJobsvd;

typedef enum {
    ONEMKL_LAYOUT_ROW,
    ONEMKL_LAYOUT_COL,
} onemklLayout;

typedef enum {
    ONEMKL_INDEX_ZERO,
    ONEMKL_INDEX_ONE,
} onemklIndex;

// SPARSE types
typedef enum {
    ONEMKL_PROPERTY_SYMMETRIC,
    ONEMKL_PROPERTY_SORTED,
} onemklProperty;

typedef enum {
    ONEMKL_MATRIX_VIEW_GENERAL,
} onemklMatrixView;

typedef enum {
    ONEMKL_MATMAT_REQUEST_GET_WORK_ESTIMATION_BUF_SIZE,
    ONEMKL_MATMAT_REQUEST_WORK_ESTIMATION,
    ONEMKL_MATMAT_REQUEST_GET_COMPUTE_STRUCTURE_BUF_SIZE,
    ONEMKL_MATMAT_REQUEST_COMPUTE_STRUCTURE,
    ONEMKL_MATMAT_REQUEST_FINALIZE_STRUCTURE,
    ONEMKL_MATMAT_REQUEST_GET_COMPUTE_BUF_SIZE,
    ONEMKL_MATMAT_REQUEST_COMPUTE,
    ONEMKL_MATMAT_REQUEST_GET_NNZ,
    ONEMKL_MATMAT_REQUEST_FINALIZE,
} onemklMatmatRequest;

typedef enum {
    ONEMKL_OMATCONVERT_DEFAULT_ALG,
} onemklOmatconvertAlg;

typedef enum {
    ONEMKL_OMATADD_DEFAULT_ALG,
} onemklOmataddAlg;

struct matrix_handle;
typedef struct matrix_handle *matrix_handle_t;

struct matmat_descr;
typedef struct matmat_descr *matmat_descr_t;

struct omatconvert_descr;
typedef struct omatconvert_descr *omatconvert_descr_t;

struct omatadd_descr;
typedef struct omatadd_descr *omatadd_descr_t;

void onemkl_version(int64_t *major, int64_t *minor, int64_t *patch);

int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                      onemklTranspose transb, int64_t *m,
                      int64_t *n, int64_t *k, uint16_t *alpha,
                      const short **a, int64_t *lda, const short **b,
                      int64_t *ldb, uint16_t *beta, short **c,
                      int64_t *ldc, int64_t group_count, int64_t *group_size);

int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                      onemklTranspose transb, int64_t *m,
                      int64_t *n, int64_t *k, float *alpha,
                      const float **a, int64_t *lda, const float **b,
                      int64_t *ldb, float *beta, float **c,
                      int64_t *ldc, int64_t group_count, int64_t *group_size);

int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                      onemklTranspose transb, int64_t *m,
                      int64_t *n, int64_t *k, double *alpha,
                      const double **a, int64_t *lda, const double **b,
                      int64_t *ldb, double *beta, double **c,
                      int64_t *ldc, int64_t group_count, int64_t *group_size);

int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                      onemklTranspose transb, int64_t *m,
                      int64_t *n, int64_t *k, float _Complex *alpha,
                      const float _Complex **a, int64_t *lda,
                      const float _Complex **b,
                      int64_t *ldb, float _Complex *beta,
                      float _Complex **c, int64_t *ldc,
                      int64_t group_count, int64_t *group_size);

int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                      onemklTranspose transb, int64_t *m,
                      int64_t *n, int64_t *k, double _Complex *alpha,
                      const double _Complex **a, int64_t *lda,
                      const double _Complex **b,
                      int64_t *ldb, double _Complex *beta,
                      double _Complex **c, int64_t *ldc,
                      int64_t group_count, int64_t *group_size);

int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                      onemklUplo upper_lower, onemklTranspose transa,
                      onemklDiag unit_diag, int64_t *m, int64_t *n,
                      float *alpha, const float **a, int64_t *lda,
                      float **b, int64_t *ldb, int64_t group_count,
                      int64_t *group_size);

int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                      onemklUplo upper_lower, onemklTranspose transa,
                      onemklDiag unit_diag, int64_t *m, int64_t *n,
                      double *alpha, const double **a, int64_t *lda,
                      double **b, int64_t *ldb, int64_t group_count,
                      int64_t *group_size);

int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                      onemklUplo upper_lower, onemklTranspose transa,
                      onemklDiag unit_diag, int64_t *m, int64_t *n,
                      float _Complex *alpha, const float _Complex **a, int64_t *lda,
                      float _Complex **b, int64_t *ldb, int64_t group_count,
                      int64_t *group_size);

int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                      onemklUplo upper_lower, onemklTranspose transa,
                      onemklDiag unit_diag, int64_t *m, int64_t *n,
                      double _Complex *alpha, const double _Complex **a, int64_t *lda,
                      double _Complex **b, int64_t *ldb, int64_t group_count,
                      int64_t *group_size);


================================================
FILE: deps/src/onemkl.cpp
================================================
#include "onemkl.h"
#include "sycl.hpp"
#include <iostream>
#include <exception>
#include <memory>
#include <oneapi/mkl.hpp>

oneapi::mkl::transpose convert(onemklTranspose val) {
    switch (val) {
    case ONEMKL_TRANSPOSE_NONTRANS:
        return oneapi::mkl::transpose::nontrans;
    case ONEMKL_TRANSPOSE_TRANS:
        return oneapi::mkl::transpose::trans;
    case ONEMLK_TRANSPOSE_CONJTRANS:
        return oneapi::mkl::transpose::conjtrans;
    }
}

oneapi::mkl::transpose* convert(const onemklTranspose* vals, int64_t size) {
    oneapi::mkl::transpose* result = new oneapi::mkl::transpose[size];
    for (int64_t i = 0; i < size; ++i) {
        switch (vals[i]) {
            case ONEMKL_TRANSPOSE_NONTRANS:
                result[i] = oneapi::mkl::transpose::nontrans;
                break;
            case ONEMKL_TRANSPOSE_TRANS:
                result[i] = oneapi::mkl::transpose::trans;
                break;
            case ONEMLK_TRANSPOSE_CONJTRANS:
                result[i] = oneapi::mkl::transpose::conjtrans;
                break;
        }
    }
    return result;
}

oneapi::mkl::uplo convert(onemklUplo val) {
    switch(val) {
        case ONEMKL_UPLO_UPPER:
            return oneapi::mkl::uplo::upper;
        case ONEMKL_UPLO_LOWER:
            return oneapi::mkl::uplo::lower;
    }
}

oneapi::mkl::uplo* convert(const onemklUplo* vals, int64_t size) {
    oneapi::mkl::uplo* result = new oneapi::mkl::uplo[size];
    for (int64_t i = 0; i < size; ++i) {
        switch (vals[i]) {
            case ONEMKL_UPLO_UPPER:
                result[i] = oneapi::mkl::uplo::upper;
                break;
            case ONEMKL_UPLO_LOWER:
                result[i] = oneapi::mkl::uplo::lower;
                break;
        }
    }
    return result;
}

oneapi::mkl::diag convert(onemklDiag val) {
    switch(val) {
        case ONEMKL_DIAG_NONUNIT:
            return oneapi::mkl::diag::nonunit;
        case ONEMKL_DIAG_UNIT:
            return oneapi::mkl::diag::unit;
    }
}

oneapi::mkl::diag* convert(const onemklDiag* vals, int64_t size) {
    oneapi::mkl::diag* result = new oneapi::mkl::diag[size];
    for (int64_t i = 0; i < size; ++i) {
        switch (vals[i]) {
            case ONEMKL_DIAG_NONUNIT:
                result[i] = oneapi::mkl::diag::nonunit;
                break;
            case ONEMKL_DIAG_UNIT:
                result[i] = oneapi::mkl::diag::unit;
                break;
        }
    }
    return result;
}

oneapi::mkl::side convert(onemklSide val) {
    switch (val) {
    case ONEMKL_SIDE_LEFT:
        return oneapi::mkl::side::left;
    case ONEMKL_SIDE_RIGHT:
        return oneapi::mkl::side::right;
    }
}

oneapi::mkl::side* convert(const onemklSide* vals, int64_t size) {
    oneapi::mkl::side* result = new oneapi::mkl::side[size];
    for (int64_t i = 0; i < size; ++i) {
        switch (vals[i]) {
            case ONEMKL_SIDE_LEFT:
                result[i] = oneapi::mkl::side::left;
                break;
            case ONEMKL_SIDE_RIGHT:
                result[i] = oneapi::mkl::side::right;
                break;
        }
    }
    return result;
}

oneapi::mkl::offset convert(onemklOffset val) {
    switch (val) {
    case ONEMKL_OFFSET_ROW:
        return oneapi::mkl::offset::row;
    case ONEMKL_OFFSET_COL:
        return oneapi::mkl::offset::column;
    case ONEMKL_OFFSET_FIX:
        return oneapi::mkl::offset::fix;
    }
}

oneapi::mkl::job convert(onemklJob val) {
    switch (val) {
    case ONEMKL_JOB_N:
        return oneapi::mkl::job::N;
    case ONEMKL_JOB_V:
        return oneapi::mkl::job::V;
    case ONEMKL_JOB_U:
        return oneapi::mkl::job::U;
    case ONEMKL_JOB_A:
        return oneapi::mkl::job::A;
    case ONEMKL_JOB_S:
        return oneapi::mkl::job::S;
    case ONEMKL_JOB_O:
        return oneapi::mkl::job::O;
    }
}

oneapi::mkl::generate convert(onemklGenerate val) {
    switch (val) {
    case ONEMKL_GENERATE_Q:
        return oneapi::mkl::generate::Q;
    case ONEMKL_GENERATE_P:
        return oneapi::mkl::generate::P;
    case ONEMKL_GENERATE_N:
        return oneapi::mkl::generate::N;
    case ONEMKL_GENERATE_V:
        return oneapi::mkl::generate::V;
    }
}

oneapi::mkl::compz convert(onemklCompz val) {
    switch (val) {
    case ONEMKL_COMPZ_N:
        return oneapi::mkl::compz::N;
    case ONEMKL_COMPZ_V:
        return oneapi::mkl::compz::V;
    case ONEMKL_COMPZ_I:
        return oneapi::mkl::compz::I;
    }
}

oneapi::mkl::direct convert(onemklDirect val) {
    switch (val) {
    case ONEMKL_DIRECT_F:
        return oneapi::mkl::direct::F;
    case ONEMKL_DIRECT_B:
        return oneapi::mkl::direct::B;
    }
}

oneapi::mkl::storev convert(onemklStorev val) {
    switch (val) {
    case ONEMKL_STOREV_C:
        return oneapi::mkl::storev::C;
    case ONEMKL_STOREV_R:
        return oneapi::mkl::storev::R;
    }
}

oneapi::mkl::rangev convert(onemklRangev val) {
    switch (val) {
    case ONEMKL_RANGEV_A:
        return oneapi::mkl::rangev::A;
    case ONEMKL_RANGEV_V:
        return oneapi::mkl::rangev::V;
    case ONEMKL_RANGEV_I:
        return oneapi::mkl::rangev::I;
    }
}

oneapi::mkl::order convert(onemklOrder val) {
    switch (val) {
    case ONEMKL_ORDER_B:
        return oneapi::mkl::order::B;
    case ONEMKL_ORDER_E:
        return oneapi::mkl::order::E;
    }
}

oneapi::mkl::jobsvd convert(onemklJobsvd val) {
    switch (val) {
    case ONEMKL_JOBSVD_N:
        return oneapi::mkl::jobsvd::N;
    case ONEMKL_JOBSVD_A:
        return oneapi::mkl::jobsvd::A;
    case ONEMKL_JOBSVD_O:
        return oneapi::mkl::jobsvd::O;
    case ONEMKL_JOBSVD_S:
        return oneapi::mkl::jobsvd::S;
    }
}

oneapi::mkl::layout convert(onemklLayout val) {
    switch (val) {
    case ONEMKL_LAYOUT_ROW:
        return oneapi::mkl::layout::row_major;
    case ONEMKL_LAYOUT_COL:
        return oneapi::mkl::layout::col_major;
    }
}

oneapi::mkl::index_base convert(onemklIndex val) {
    switch (val) {
    case ONEMKL_INDEX_ZERO:
        return oneapi::mkl::index_base::zero;
    case ONEMKL_INDEX_ONE:
        return oneapi::mkl::index_base::one;
    }
}

oneapi::mkl::sparse::property convert(onemklProperty val) {
    switch (val) {
    case ONEMKL_PROPERTY_SYMMETRIC:
        return oneapi::mkl::sparse::property::symmetric;
    case ONEMKL_PROPERTY_SORTED:
        return oneapi::mkl::sparse::property::sorted;
    }
}

oneapi::mkl::sparse::matrix_view_descr convert(onemklMatrixView val) {
    switch (val) {
    case ONEMKL_MATRIX_VIEW_GENERAL:
        return oneapi::mkl::sparse::matrix_view_descr::general;
    }
}

oneapi::mkl::sparse::matmat_request convert(onemklMatmatRequest val) {
    switch (val) {
    case ONEMKL_MATMAT_REQUEST_GET_WORK_ESTIMATION_BUF_SIZE:
        return oneapi::mkl::sparse::matmat_request::get_work_estimation_buf_size;
    case ONEMKL_MATMAT_REQUEST_WORK_ESTIMATION:
        return oneapi::mkl::sparse::matmat_request::work_estimation;
    case ONEMKL_MATMAT_REQUEST_GET_COMPUTE_STRUCTURE_BUF_SIZE:
        return oneapi::mkl::sparse::matmat_request::get_compute_structure_buf_size;
    case ONEMKL_MATMAT_REQUEST_COMPUTE_STRUCTURE:
        return oneapi::mkl::sparse::matmat_request::compute_structure;
    case ONEMKL_MATMAT_REQUEST_FINALIZE_STRUCTURE:
        return oneapi::mkl::sparse::matmat_request::finalize_structure;
    case ONEMKL_MATMAT_REQUEST_GET_COMPUTE_BUF_SIZE:
        return oneapi::mkl::sparse::matmat_request::get_compute_buf_size;
    case ONEMKL_MATMAT_REQUEST_COMPUTE:
        return oneapi::mkl::sparse::matmat_request::compute;
    case ONEMKL_MATMAT_REQUEST_GET_NNZ:
        return oneapi::mkl::sparse::matmat_request::get_nnz;
    case ONEMKL_MATMAT_REQUEST_FINALIZE:
        return oneapi::mkl::sparse::matmat_request::finalize;
    }
}

oneapi::mkl::sparse::omatconvert_alg convert(onemklOmatconvertAlg val) {
    switch (val) {
    case ONEMKL_OMATCONVERT_DEFAULT_ALG:
        return oneapi::mkl::sparse::omatconvert_alg::default_alg;
    }
}

oneapi::mkl::sparse::omatadd_alg convert(onemklOmataddAlg val) {
    switch (val) {
    case ONEMKL_OMATADD_DEFAULT_ALG:
        return oneapi::mkl::sparse::omatadd_alg::default_alg;
    }
}

// version
extern "C" void onemkl_version(int64_t *major, int64_t *minor, int64_t *patch) {
    *major = ONEMKL_VERSION_MAJOR;
    *minor = ONEMKL_VERSION_MINOR;
    *patch = ONEMKL_VERSION_PATCH;
    return;
}

// gemm
// https://spec.oneapi.io/versions/1.0-rev-1/elements/oneMKL/source/domains/blas/gemm.html
class gemmBatchInfo {
    public:
        oneapi::mkl::transpose *m_transa = nullptr;
        oneapi::mkl::transpose *m_transb = nullptr;
        sycl::device m_device;
        sycl::context m_context;
        oneapi::mkl::transpose m_ta;
        oneapi::mkl::transpose m_tb;
        // Constructor
        gemmBatchInfo(syclQueue_t device_queue,
                    int64_t group_count,
                    onemklTranspose transa,
                    onemklTranspose transb) {
            // Get device and context info from device_queue
            auto main_queue = device_queue->val;
            m_device = main_queue.get_device();
            m_context = main_queue.get_context();

            // Allocate transpose shared buffers
            try {
                m_transa = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose),
                                                                    m_device, m_context);
                m_transb = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose),
                                                                    m_device, m_context);
                m_ta = convert(transa);
                m_tb = convert(transb);
            } catch(const std::bad_alloc& e) {
                std::cerr << "Error: " << e.what() << std::endl;
            }

            // Initialize
            for (int i = 0; i < group_count; i++) {
                m_transa[i] = m_ta;
                m_transb[i] = m_tb;
            }
        };

        // Destructor
        ~gemmBatchInfo() {
            free(m_transa, m_context);
            free(m_transb, m_context);
        }
};

class trsmBatchInfo {
    public:
        oneapi::mkl::transpose *m_transa = nullptr;
        oneapi::mkl::side *m_leftright = nullptr;
        oneapi::mkl::uplo *m_upperlower = nullptr;
        oneapi::mkl::diag *m_unitdiag = nullptr;
        sycl::device m_device;
        sycl::context m_context;
        oneapi::mkl::transpose m_ta;
        oneapi::mkl::side m_side;
        oneapi::mkl::uplo m_uplo;
        oneapi::mkl::diag m_diag;

        // Constructor
        trsmBatchInfo(syclQueue_t device_queue,
                    onemklSide left_right,
                    onemklUplo upper_lower,
                    onemklTranspose transa,
                    onemklDiag unit_diag,
                    int64_t group_count) {
            // Get device and context info from device_queue
            auto main_queue = device_queue->val;
            m_device = main_queue.get_device();
            m_context = main_queue.get_context();
            try {
                // Allocate uniform arrays of group_size and transpose_a, transpose_b supporting oneMKL
                // gemm_batch API
                m_transa = (oneapi::mkl::transpose *) malloc_shared(group_count * sizeof(oneapi::mkl::transpose),
                                                                    m_device, m_context);
                m_leftright = (oneapi::mkl::side *) malloc_shared(group_count * sizeof(oneapi::mkl::side),
                                                                m_device, m_context);
                m_upperlower = (oneapi::mkl::uplo *) malloc_shared(group_count * sizeof(oneapi::mkl::uplo),
                                                                m_device, m_context);
                m_unitdiag = (oneapi::mkl::diag *) malloc_shared(group_count * sizeof(oneapi::mkl::diag),
                                                                m_device, m_context);
                m_ta = convert(transa);
                m_side = convert(left_right);
                m_uplo = convert(upper_lower);
                m_diag = convert(unit_diag);
            } catch(const std::bad_alloc& e) {
                std::cerr << "Error: " << e.what() << std::endl;
            }
            // Initialize
            for (int i = 0; i < group_count; i++) {
                m_transa[i] = m_ta;
                m_leftright[i] = m_side;
                m_upperlower[i] = m_uplo;
                m_unitdiag[i] = m_diag;
            }
        };

        // Destructor
        ~trsmBatchInfo() {
            free(m_transa, m_context);
            free(m_upperlower, m_context);
            free(m_unitdiag, m_context);
            free(m_leftright, m_context);
        }
};

extern "C" int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                                 onemklTranspose transb, int64_t *m,
                                 int64_t *n, int64_t *k, uint16_t *alpha,
                                 const short **a, int64_t *lda, const short **b,
                                 int64_t *ldb, uint16_t *beta, short **c,
                                 int64_t *ldc, int64_t group_count, int64_t *group_size) {
    gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb);
    device_queue->val.wait_and_throw();
    auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val,
                        &gemmInfo.m_transa[0], &gemmInfo.m_transb[0],
                        m, n, k, reinterpret_cast<sycl::half *>(alpha),
                        reinterpret_cast<const sycl::half **>(&a[0]), lda,
                        reinterpret_cast<const sycl::half **>(&b[0]), ldb,
                        reinterpret_cast<sycl::half *>(beta), reinterpret_cast<sycl::half **>(&c[0]),
                        ldc, group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                                 onemklTranspose transb, int64_t *m,
                                 int64_t *n, int64_t *k, float *alpha,
                                 const float **a, int64_t *lda, const float **b,
                                 int64_t *ldb, float *beta, float **c,
                                 int64_t *ldc, int64_t group_count, int64_t *group_size) {
    gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb);
    device_queue->val.wait_and_throw();
    auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val,
                        &gemmInfo.m_transa[0], &gemmInfo.m_transb[0],
                        m, n, k, alpha,
                        (const float **)&a[0], lda,
                        (const float **)&b[0], ldb,
                        beta, &c[0], ldc,
                        group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                                 onemklTranspose transb, int64_t *m,
                                 int64_t *n, int64_t *k, double *alpha,
                                 const double **a, int64_t *lda, const double **b,
                                 int64_t *ldb, double *beta, double **c,
                                 int64_t *ldc, int64_t group_count, int64_t *group_size) {
    gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb);
    device_queue->val.wait_and_throw();
    auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val,
                        &gemmInfo.m_transa[0], &gemmInfo.m_transb[0],
                        m, n, k, alpha,
                        (const double **)&a[0], lda,
                        (const double **)&b[0], ldb,
                        beta, &c[0], ldc,
                        group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                                 onemklTranspose transb, int64_t *m,
                                 int64_t *n, int64_t *k, float _Complex *alpha,
                                 const float _Complex **a, int64_t *lda,
                                 const float _Complex **b,
                                 int64_t *ldb, float _Complex *beta, float _Complex **c,
                                 int64_t *ldc, int64_t group_count, int64_t *group_size) {
    gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb);
    device_queue->val.wait_and_throw();
    auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val,
                        &gemmInfo.m_transa[0], &gemmInfo.m_transb[0],
                        m, n, k, reinterpret_cast<std::complex<float> *>(alpha),
                        reinterpret_cast<const std::complex<float> **>(&a[0]),
                        lda,
                        reinterpret_cast<const std::complex<float> **>(&b[0]),
                        ldb,
                        reinterpret_cast<std::complex<float> *>(beta),
                        reinterpret_cast<std::complex<float> **>(&c[0]), ldc,
                        group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
                                 onemklTranspose transb, int64_t *m,
                                 int64_t *n, int64_t *k, double _Complex *alpha,
                                 const double _Complex **a, int64_t *lda,
                                 const double _Complex **b,
                                 int64_t *ldb, double _Complex *beta,
                                 double _Complex **c,
                                 int64_t *ldc, int64_t group_count, int64_t *group_size) {
    gemmBatchInfo gemmInfo(device_queue, group_count, transa, transb);
    device_queue->val.wait_and_throw();
    auto status = oneapi::mkl::blas::column_major::gemm_batch(device_queue->val,
                        &gemmInfo.m_transa[0], &gemmInfo.m_transb[0],
                        m, n, k, reinterpret_cast<std::complex<double> *>(alpha),
                        reinterpret_cast<const std::complex<double> **>(&a[0]),
                        lda,
                        reinterpret_cast<const std::complex<double> **>(&b[0]),
                        ldb,
                        reinterpret_cast<std::complex<double> *>(beta),
                        reinterpret_cast<std::complex<double> **>(&c[0]), ldc,
                        group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                                 onemklUplo upper_lower, onemklTranspose transa,
                                 onemklDiag unit_diag, int64_t *m, int64_t *n, float *alpha,
                                 const float **a, int64_t *lda, float **b, int64_t *ldb,
                                 int64_t group_count, int64_t *group_size) {
    trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa,
                           unit_diag, group_count);
    device_queue->val.wait_and_throw();

    auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val,
                        &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0],
                        &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0],
                        m, n, alpha, (const float **)&a[0], lda,
                        &b[0], ldb, group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                                 onemklUplo upper_lower, onemklTranspose transa,
                                 onemklDiag unit_diag, int64_t *m, int64_t *n,
                                 double *alpha, const double **a, int64_t *lda,
                                 double **b, int64_t *ldb, int64_t group_count,
                                 int64_t *group_size) {
    trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa,
                                unit_diag, group_count);
    device_queue->val.wait_and_throw();

    auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val,
                        &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0],
                        &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0],
                        m, n, alpha, (const double **)&a[0], lda, &b[0],
                        ldb, group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                                 onemklUplo upper_lower, onemklTranspose transa,
                                 onemklDiag unit_diag, int64_t *m, int64_t *n,
                                 float _Complex *alpha, const float _Complex **a,
                                 int64_t *lda, float _Complex **b, int64_t *ldb,
                                 int64_t group_count, int64_t *group_size) {
    trsmBatchInfo trsmInfo(device_queue, left_right, upper_lower, transa,
                                unit_diag, group_count);
    device_queue->val.wait_and_throw();

    auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val,
                        &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0],
                        &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0],
                        m, n, reinterpret_cast<std::complex<float> *>(alpha),
                        reinterpret_cast<const std::complex<float> **>(&a[0]),
                        lda, reinterpret_cast<std::complex<float> **>(&b[0]),
                        ldb, group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}

extern "C" int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
                                 onemklUplo upper_lower, onemklTranspose transa,
                                 onemklDiag unit_diag, int64_t *m, int64_t *n,
                                 double _Complex *alpha, const double _Complex **a,
                                 int64_t *lda, double _Complex **b, int64_t *ldb,
                                 int64_t group_count, int64_t *group_size) {
    trsmBatchInfo trsmInfo(device_queue, left_right,
                                upper_lower, transa, unit_diag, group_count);
    device_queue->val.wait_and_throw();

    auto status = oneapi::mkl::blas::column_major::trsm_batch(device_queue->val,
                        &trsmInfo.m_leftright[0], &trsmInfo.m_upperlower[0],
                        &trsmInfo.m_transa[0], &trsmInfo.m_unitdiag[0],
                        m, n, reinterpret_cast<std::complex<double> *>(alpha),
                        reinterpret_cast<const std::complex<double> **>(&a[0]),
                        lda, reinterpret_cast<std::complex<double> **>(&b[0]),
                        ldb, group_count, group_size, {});
    device_queue->val.wait_and_throw();
    return 0;
}
// BLAS
extern "C" int onemklHgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, short *alpha, short *a, int64_t lda, short *b, int64_t ldb, short *beta, short *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, *reinterpret_cast<sycl::half*>(alpha), reinterpret_cast<sycl::half*>(a), lda, reinterpret_cast<sycl::half*>(b), ldb, *reinterpret_cast<sycl::half*>(beta), reinterpret_cast<sycl::half*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, *alpha, a, lda, b, ldb, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, *alpha, a, lda, b, ldb, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(b), ldb, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZgemm(syclQueue_t device_queue, onemklTranspose transa, onemklTranspose transb, int64_t m, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::gemm(device_queue->val, convert(transa), convert(transb), m, n, k, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(b), ldb, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(b), ldb, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZsymm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::symm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(b), ldb, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklChemm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::hemm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(b), ldb, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZhemm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::hemm(device_queue->val, convert(left_right), convert(upper_lower), m, n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(b), ldb, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *beta, float *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, a, lda, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *beta, double *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, a, lda, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *beta, float _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *beta, double _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::syrk(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCherk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float _Complex *a, int64_t lda, float *beta, float _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::herk(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, reinterpret_cast<std::complex<float>*>(a), lda, *beta, reinterpret_cast<std::complex<float>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZherk(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double _Complex *a, int64_t lda, double *beta, double _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::herk(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, reinterpret_cast<std::complex<double>*>(a), lda, *beta, reinterpret_cast<std::complex<double>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, a, lda, b, ldb, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *alpha, a, lda, b, ldb, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(b), ldb, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(b), ldb, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float *beta, float _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::her2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(b), ldb, *beta, reinterpret_cast<std::complex<float>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double *beta, double _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::her2k(device_queue->val, convert(upper_lower), convert(trans), n, k, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(b), ldb, *beta, reinterpret_cast<std::complex<double>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklStrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(b), ldb, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZtrmm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(b), ldb, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklStrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDtrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCtrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(b), ldb, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZtrmm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(b), ldb, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklStrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(b), ldb, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZtrsm(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(b), ldb, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklStrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *b, int64_t ldb, float *beta, float *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDtrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *b, int64_t ldb, double *beta, double *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *alpha, a, lda, b, ldb, *beta, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCtrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *b, int64_t ldb, float _Complex *beta, float _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(b), ldb, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZtrsm_variant(syclQueue_t device_queue, onemklSide left_right, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *b, int64_t ldb, double _Complex *beta, double _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsm(device_queue->val, convert(left_right), convert(upper_lower), convert(trans), convert(unit_diag), m, n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(b), ldb, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float *a, int64_t lda, float *x, int64_t incx, float *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, a, lda, x, incx, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double *a, int64_t lda, double *x, int64_t incx, double *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, a, lda, x, incx, c, ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t m, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *c, int64_t ldc) {
   try {
      auto status = oneapi::mkl::blas::column_major::dgmm(device_queue->val, convert(left_right), m, n, reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(c), ldc, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, *alpha, a, lda, x, incx, *beta, y, incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, *alpha, a, lda, x, incx, *beta, y, incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(x), incx, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::gemv(device_queue->val, convert(trans), m, n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(x), incx, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, *alpha, a, lda, x, incx, *beta, y, incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, *alpha, a, lda, x, incx, *beta, y, incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(x), incx, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::gbmv(device_queue->val, convert(trans), m, n, kl, ku, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(x), incx, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSger(syclQueue_t device_queue, int64_t m, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy, float *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::ger(device_queue->val, m, n, *alpha, x, incx, y, incy, a, lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDger(syclQueue_t device_queue, int64_t m, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy, double *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::ger(device_queue->val, m, n, *alpha, x, incx, y, incy, a, lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCgerc(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::gerc(device_queue->val, m, n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(y), incy, reinterpret_cast<std::complex<float>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZgerc(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::gerc(device_queue->val, m, n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(y), incy, reinterpret_cast<std::complex<double>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCgeru(syclQueue_t device_queue, int64_t m, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::geru(device_queue->val, m, n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(y), incy, reinterpret_cast<std::complex<float>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZgeru(syclQueue_t device_queue, int64_t m, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::geru(device_queue->val, m, n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(y), incy, reinterpret_cast<std::complex<double>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklChbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::hbmv(device_queue->val, convert(upper_lower), n, k, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(x), incx, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZhbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::hbmv(device_queue->val, convert(upper_lower), n, k, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(x), incx, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklChemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::hemv(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(x), incx, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZhemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::hemv(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(x), incx, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float _Complex *x, int64_t incx, float _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::her(device_queue->val, convert(upper_lower), n, *alpha, reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double _Complex *x, int64_t incx, double _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::her(device_queue->val, convert(upper_lower), n, *alpha, reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::her2(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(y), incy, reinterpret_cast<std::complex<float>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::her2(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(y), incy, reinterpret_cast<std::complex<double>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklChpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *a, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::hpmv(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), reinterpret_cast<std::complex<float>*>(x), incx, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZhpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *a, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::hpmv(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), reinterpret_cast<std::complex<double>*>(x), incx, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklChpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float _Complex *x, int64_t incx, float _Complex *a) {
   try {
      auto status = oneapi::mkl::blas::column_major::hpr(device_queue->val, convert(upper_lower), n, *alpha, reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(a), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZhpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double _Complex *x, int64_t incx, double _Complex *a) {
   try {
      auto status = oneapi::mkl::blas::column_major::hpr(device_queue->val, convert(upper_lower), n, *alpha, reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(a), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklChpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a) {
   try {
      auto status = oneapi::mkl::blas::column_major::hpr2(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(y), incy, reinterpret_cast<std::complex<float>*>(a), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZhpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a) {
   try {
      auto status = oneapi::mkl::blas::column_major::hpr2(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(y), incy, reinterpret_cast<std::complex<double>*>(a), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::sbmv(device_queue->val, convert(upper_lower), n, k, *alpha, a, lda, x, incx, *beta, y, incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, int64_t k, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::sbmv(device_queue->val, convert(upper_lower), n, k, *alpha, a, lda, x, incx, *beta, y, incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *a, int64_t lda, float *x, int64_t incx, float *beta, float *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, *alpha, a, lda, x, incx, *beta, y, incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *a, int64_t lda, double *x, int64_t incx, double *beta, double *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, *alpha, a, lda, x, incx, *beta, y, incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx, float _Complex *beta, float _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(x), incx, *reinterpret_cast<std::complex<float>*>(beta), reinterpret_cast<std::complex<float>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx, double _Complex *beta, double _Complex *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::symv(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(x), incx, *reinterpret_cast<std::complex<double>*>(beta), reinterpret_cast<std::complex<double>*>(y), incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, *alpha, x, incx, a, lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, *alpha, x, incx, a, lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy, float *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, *alpha, x, incx, y, incy, a, lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy, double *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, *alpha, x, incx, y, incy, a, lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float _Complex *alpha, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<float>*>(alpha), reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(y), incy, reinterpret_cast<std::complex<float>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double _Complex *alpha, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *a, int64_t lda) {
   try {
      auto status = oneapi::mkl::blas::column_major::syr2(device_queue->val, convert(upper_lower), n, *reinterpret_cast<std::complex<double>*>(alpha), reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(y), incy, reinterpret_cast<std::complex<double>*>(a), lda, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *a, float *x, int64_t incx, float *beta, float *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::spmv(device_queue->val, convert(upper_lower), n, *alpha, a, x, incx, *beta, y, incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *a, double *x, int64_t incx, double *beta, double *y, int64_t incy) {
   try {
      auto status = oneapi::mkl::blas::column_major::spmv(device_queue->val, convert(upper_lower), n, *alpha, a, x, incx, *beta, y, incy, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *a) {
   try {
      auto status = oneapi::mkl::blas::column_major::spr(device_queue->val, convert(upper_lower), n, *alpha, x, incx, a, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *a) {
   try {
      auto status = oneapi::mkl::blas::column_major::spr(device_queue->val, convert(upper_lower), n, *alpha, x, incx, a, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, float *alpha, float *x, int64_t incx, float *y, int64_t incy, float *a) {
   try {
      auto status = oneapi::mkl::blas::column_major::spr2(device_queue->val, convert(upper_lower), n, *alpha, x, incx, y, incy, a, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_t n, double *alpha, double *x, int64_t incx, double *y, int64_t incy, double *a) {
   try {
      auto status = oneapi::mkl::blas::column_major::spr2(device_queue->val, convert(upper_lower), n, *alpha, x, incx, y, incy, a, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklStbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float *a, int64_t lda, float *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double *a, int64_t lda, double *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tbmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklStbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float *a, int64_t lda, float *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double *a, int64_t lda, double *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, a, lda, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, int64_t k, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tbsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, k, reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklStpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, float *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, double *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, float _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast<std::complex<float>*>(a), reinterpret_cast<std::complex<float>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, double _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tpmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast<std::complex<double>*>(a), reinterpret_cast<std::complex<double>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklStpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, float *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, double *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, float _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast<std::complex<float>*>(a), reinterpret_cast<std::complex<float>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, double _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::tpsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast<std::complex<double>*>(a), reinterpret_cast<std::complex<double>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklStrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, int64_t lda, float *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, int64_t lda, double *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::trmv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklStrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float *a, int64_t lda, float *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double *a, int64_t lda, double *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, a, lda, x, incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, float _Complex *a, int64_t lda, float _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast<std::complex<float>*>(a), lda, reinterpret_cast<std::complex<float>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemklTranspose trans, onemklDiag unit_diag, int64_t n, double _Complex *a, int64_t lda, double _Complex *x, int64_t incx) {
   try {
      auto status = oneapi::mkl::blas::column_major::trsv(device_queue->val, convert(upper_lower), convert(trans), convert(unit_diag), n, reinterpret_cast<std::complex<double>*>(a), lda, reinterpret_cast<std::complex<double>*>(x), incx, {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCdotc(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *result) {
   try {
      auto status = oneapi::mkl::blas::column_major::dotc(device_queue->val, n, reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(y), incy, reinterpret_cast<std::complex<float>*>(result), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZdotc(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *result) {
   try {
      auto status = oneapi::mkl::blas::column_major::dotc(device_queue->val, n, reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(y), incy, reinterpret_cast<std::complex<double>*>(result), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCdotu(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, float _Complex *y, int64_t incy, float _Complex *result) {
   try {
      auto status = oneapi::mkl::blas::column_major::dotu(device_queue->val, n, reinterpret_cast<std::complex<float>*>(x), incx, reinterpret_cast<std::complex<float>*>(y), incy, reinterpret_cast<std::complex<float>*>(result), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZdotu(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, double _Complex *y, int64_t incy, double _Complex *result) {
   try {
      auto status = oneapi::mkl::blas::column_major::dotu(device_queue->val, n, reinterpret_cast<std::complex<double>*>(x), incx, reinterpret_cast<std::complex<double>*>(y), incy, reinterpret_cast<std::complex<double>*>(result), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSiamax(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int32_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSiamax_64(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDiamax(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int32_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDiamax_64(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, x, incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCiamax(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int32_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, reinterpret_cast<std::complex<float>*>(x), incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCiamax_64(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, reinterpret_cast<std::complex<float>*>(x), incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZiamax(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int32_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, reinterpret_cast<std::complex<double>*>(x), incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZiamax_64(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamax(device_queue->val, n, reinterpret_cast<std::complex<double>*>(x), incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSiamin(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int32_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklSiamin_64(syclQueue_t device_queue, int64_t n, float *x, int64_t incx, int64_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDiamin(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int32_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklDiamin_64(syclQueue_t device_queue, int64_t n, double *x, int64_t incx, int64_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, x, incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCiamin(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int32_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, reinterpret_cast<std::complex<float>*>(x), incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklCiamin_64(syclQueue_t device_queue, int64_t n, float _Complex *x, int64_t incx, int64_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, reinterpret_cast<std::complex<float>*>(x), incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZiamin(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int32_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, reinterpret_cast<std::complex<double>*>(x), incx, result, convert(base), {});
      device_queue->val.wait_and_throw();
   } catch (const sycl::exception& e) { return -1; }
   return 0;
}

extern "C" int onemklZiamin_64(syclQueue_t device_queue, int64_t n, double _Complex *x, int64_t incx, int64_t *result, onemklIndex base) {
   try {
      auto status = oneapi::mkl::blas::column_major::iamin(device_queue->val, n, reinterpret_cast<std::complex<double>*
Download .txt
gitextract_pys1ksq7/

├── .buildkite/
│   └── pipeline.yml
├── .github/
│   ├── dependabot.yml
│   └── workflows/
│       ├── CompatHelper.yml
│       ├── DocsCleanup.yml
│       ├── Format.yml
│       ├── TagBot.yml
│       ├── ci.yml
│       └── docs.yml
├── .gitignore
├── CITATION.cff
├── LICENSE.md
├── Project.toml
├── README.md
├── codecov.yml
├── deps/
│   ├── .clang-format
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── Project.toml
│   ├── build_ci.jl
│   ├── build_local.jl
│   ├── generate_helpers.jl
│   ├── generate_interfaces.jl
│   ├── onemkl_epilogue.cpp
│   ├── onemkl_epilogue.h
│   ├── onemkl_prologue.cpp
│   ├── onemkl_prologue.h
│   └── src/
│       ├── onemkl.cpp
│       ├── onemkl.h
│       ├── onemkl_dft.cpp
│       ├── onemkl_dft.h
│       ├── sycl.cpp
│       ├── sycl.h
│       └── sycl.hpp
├── docs/
│   ├── Project.toml
│   ├── make.jl
│   └── src/
│       ├── api/
│       │   ├── arrays.md
│       │   ├── compiler.md
│       │   ├── context.md
│       │   ├── kernels.md
│       │   └── memory.md
│       ├── api.md
│       ├── arrays.md
│       ├── device.md
│       ├── getting_started.md
│       ├── index.md
│       ├── installation.md
│       ├── kernels.md
│       ├── level_zero.md
│       ├── memory.md
│       ├── onemkl.md
│       ├── troubleshooting.md
│       └── usage/
│           └── performance.md
├── examples/
│   ├── gemm.jl
│   └── vadd.jl
├── lib/
│   ├── level-zero/
│   │   ├── barrier.jl
│   │   ├── cmdlist.jl
│   │   ├── cmdqueue.jl
│   │   ├── common.jl
│   │   ├── context.jl
│   │   ├── copy.jl
│   │   ├── device.jl
│   │   ├── driver.jl
│   │   ├── error.jl
│   │   ├── event.jl
│   │   ├── fence.jl
│   │   ├── libze.jl
│   │   ├── libze_aliases.jl
│   │   ├── memory.jl
│   │   ├── module.jl
│   │   ├── oneL0.jl
│   │   ├── pointer.jl
│   │   ├── residency.jl
│   │   └── utils.jl
│   ├── mkl/
│   │   ├── array.jl
│   │   ├── fft.jl
│   │   ├── interfaces.jl
│   │   ├── linalg.jl
│   │   ├── oneMKL.jl
│   │   ├── utils.jl
│   │   ├── wrappers_blas.jl
│   │   ├── wrappers_lapack.jl
│   │   └── wrappers_sparse.jl
│   ├── support/
│   │   ├── Support.jl
│   │   └── liboneapi_support.jl
│   ├── sycl/
│   │   └── SYCL.jl
│   └── utils/
│       ├── APIUtils.jl
│       └── enum.jl
├── res/
│   ├── Project.toml
│   ├── libze_prologue.jl
│   ├── local.jl
│   ├── support.toml
│   ├── wrap.jl
│   └── ze.toml
├── src/
│   ├── accumulate.jl
│   ├── array.jl
│   ├── broadcast.jl
│   ├── compiler/
│   │   ├── compilation.jl
│   │   ├── execution.jl
│   │   └── reflection.jl
│   ├── context.jl
│   ├── device/
│   │   ├── array.jl
│   │   ├── atomics.jl
│   │   ├── quirks.jl
│   │   └── runtime.jl
│   ├── gpuarrays.jl
│   ├── indexing.jl
│   ├── mapreduce.jl
│   ├── memory.jl
│   ├── oneAPI.jl
│   ├── oneAPIKernels.jl
│   ├── pool.jl
│   ├── random.jl
│   ├── sorting.jl
│   └── utils.jl
└── test/
    ├── Project.toml
    ├── array.jl
    ├── device/
    │   └── intrinsics.jl
    ├── dummy.bc
    ├── dummy.ll
    ├── dummy.spt
    ├── dummy.spv
    ├── examples.jl
    ├── execution.jl
    ├── fft.jl
    ├── indexing.jl
    ├── kernelabstractions.jl
    ├── level-zero.jl
    ├── onemkl.jl
    ├── pointer.jl
    ├── random.jl
    ├── runtests.jl
    ├── setup.jl
    ├── sorting.jl
    └── sycl.jl
Download .txt
SYMBOL INDEX (919 symbols across 10 files)

FILE: deps/onemkl_epilogue.cpp
  function onemklXsparse_matmat (line 1) | int onemklXsparse_matmat(syclQueue_t device_queue, matrix_handle_t A, ma...
  function onemklDestroy (line 13) | int onemklDestroy() {

FILE: deps/onemkl_prologue.cpp
  function convert (line 8) | oneapi::mkl::transpose convert(onemklTranspose val) {
  function convert (line 37) | oneapi::mkl::uplo convert(onemklUplo val) {
  function convert (line 61) | oneapi::mkl::diag convert(onemklDiag val) {
  function convert (line 85) | oneapi::mkl::side convert(onemklSide val) {
  function convert (line 109) | oneapi::mkl::offset convert(onemklOffset val) {
  function convert (line 120) | oneapi::mkl::job convert(onemklJob val) {
  function convert (line 137) | oneapi::mkl::generate convert(onemklGenerate val) {
  function convert (line 150) | oneapi::mkl::compz convert(onemklCompz val) {
  function convert (line 161) | oneapi::mkl::direct convert(onemklDirect val) {
  function convert (line 170) | oneapi::mkl::storev convert(onemklStorev val) {
  function convert (line 179) | oneapi::mkl::rangev convert(onemklRangev val) {
  function convert (line 190) | oneapi::mkl::order convert(onemklOrder val) {
  function convert (line 199) | oneapi::mkl::jobsvd convert(onemklJobsvd val) {
  function convert (line 212) | oneapi::mkl::layout convert(onemklLayout val) {
  function convert (line 221) | oneapi::mkl::index_base convert(onemklIndex val) {
  function convert (line 230) | oneapi::mkl::sparse::property convert(onemklProperty val) {
  function convert (line 239) | oneapi::mkl::sparse::matrix_view_descr convert(onemklMatrixView val) {
  function convert (line 246) | oneapi::mkl::sparse::matmat_request convert(onemklMatmatRequest val) {
  function convert (line 269) | oneapi::mkl::sparse::omatconvert_alg convert(onemklOmatconvertAlg val) {
  function convert (line 276) | oneapi::mkl::sparse::omatadd_alg convert(onemklOmataddAlg val) {
  function onemkl_version (line 284) | void onemkl_version(int64_t *major, int64_t *minor, int64_t *patch) {
  class gemmBatchInfo (line 293) | class gemmBatchInfo {
    method gemmBatchInfo (line 302) | gemmBatchInfo(syclQueue_t device_queue,
  class trsmBatchInfo (line 337) | class trsmBatchInfo {
    method trsmBatchInfo (line 351) | trsmBatchInfo(syclQueue_t device_queue,
  function onemklHgemm_batch (line 397) | int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
  function onemklSgemm_batch (line 416) | int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
  function onemklDgemm_batch (line 435) | int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
  function onemklCgemm_batch (line 454) | int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
  function onemklZgemm_batch (line 477) | int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
  function onemklStrsm_batch (line 501) | int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right,
  function onemklDtrsm_batch (line 519) | int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
  function onemklCtrsm_batch (line 538) | int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
  function onemklZtrsm_batch (line 559) | int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right,

FILE: deps/onemkl_prologue.h
  type onemklTranspose (line 13) | typedef enum {
  type onemklUplo (line 19) | typedef enum {
  type onemklDiag (line 24) | typedef enum {
  type onemklSide (line 29) | typedef enum {
  type onemklOffset (line 34) | typedef enum {
  type onemklJob (line 41) | typedef enum {
  type onemklGenerate (line 50) | typedef enum {
  type onemklCompz (line 57) | typedef enum {
  type onemklDirect (line 63) | typedef enum {
  type onemklStorev (line 68) | typedef enum {
  type onemklRangev (line 73) | typedef enum {
  type onemklOrder (line 79) | typedef enum {
  type onemklJobsvd (line 84) | typedef enum {
  type onemklLayout (line 91) | typedef enum {
  type onemklIndex (line 96) | typedef enum {
  type onemklProperty (line 102) | typedef enum {
  type onemklMatrixView (line 107) | typedef enum {
  type onemklMatmatRequest (line 111) | typedef enum {
  type onemklOmatconvertAlg (line 123) | typedef enum {
  type onemklOmataddAlg (line 127) | typedef enum {
  type matrix_handle (line 131) | struct matrix_handle
  type matrix_handle (line 132) | struct matrix_handle
  type matmat_descr (line 134) | struct matmat_descr
  type matmat_descr (line 135) | struct matmat_descr
  type omatconvert_descr (line 137) | struct omatconvert_descr
  type omatconvert_descr (line 138) | struct omatconvert_descr
  type omatadd_descr (line 140) | struct omatadd_descr
  type omatadd_descr (line 141) | struct omatadd_descr

FILE: deps/src/onemkl.cpp
  function convert (line 8) | oneapi::mkl::transpose convert(onemklTranspose val) {
  function convert (line 37) | oneapi::mkl::uplo convert(onemklUplo val) {
  function convert (line 61) | oneapi::mkl::diag convert(onemklDiag val) {
  function convert (line 85) | oneapi::mkl::side convert(onemklSide val) {
  function convert (line 109) | oneapi::mkl::offset convert(onemklOffset val) {
  function convert (line 120) | oneapi::mkl::job convert(onemklJob val) {
  function convert (line 137) | oneapi::mkl::generate convert(onemklGenerate val) {
  function convert (line 150) | oneapi::mkl::compz convert(onemklCompz val) {
  function convert (line 161) | oneapi::mkl::direct convert(onemklDirect val) {
  function convert (line 170) | oneapi::mkl::storev convert(onemklStorev val) {
  function convert (line 179) | oneapi::mkl::rangev convert(onemklRangev val) {
  function convert (line 190) | oneapi::mkl::order convert(onemklOrder val) {
  function convert (line 199) | oneapi::mkl::jobsvd convert(onemklJobsvd val) {
  function convert (line 212) | oneapi::mkl::layout convert(onemklLayout val) {
  function convert (line 221) | oneapi::mkl::index_base convert(onemklIndex val) {
  function convert (line 230) | oneapi::mkl::sparse::property convert(onemklProperty val) {
  function convert (line 239) | oneapi::mkl::sparse::matrix_view_descr convert(onemklMatrixView val) {
  function convert (line 246) | oneapi::mkl::sparse::matmat_request convert(onemklMatmatRequest val) {
  function convert (line 269) | oneapi::mkl::sparse::omatconvert_alg convert(onemklOmatconvertAlg val) {
  function convert (line 276) | oneapi::mkl::sparse::omatadd_alg convert(onemklOmataddAlg val) {
  function onemkl_version (line 284) | void onemkl_version(int64_t *major, int64_t *minor, int64_t *patch) {
  class gemmBatchInfo (line 293) | class gemmBatchInfo {
    method gemmBatchInfo (line 302) | gemmBatchInfo(syclQueue_t device_queue,
  class trsmBatchInfo (line 337) | class trsmBatchInfo {
    method trsmBatchInfo (line 351) | trsmBatchInfo(syclQueue_t device_queue,
  function onemklHgemm_batch (line 397) | int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
  function onemklSgemm_batch (line 416) | int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
  function onemklDgemm_batch (line 435) | int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
  function onemklCgemm_batch (line 454) | int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
  function onemklZgemm_batch (line 477) | int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose transa,
  function onemklStrsm_batch (line 501) | int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right,
  function onemklDtrsm_batch (line 519) | int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
  function onemklCtrsm_batch (line 538) | int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
  function onemklZtrsm_batch (line 559) | int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right,
  function onemklHgemm (line 580) | int onemklHgemm(syclQueue_t device_queue, onemklTranspose transa, onemkl...
  function onemklSgemm (line 588) | int onemklSgemm(syclQueue_t device_queue, onemklTranspose transa, onemkl...
  function onemklDgemm (line 596) | int onemklDgemm(syclQueue_t device_queue, onemklTranspose transa, onemkl...
  function onemklCgemm (line 604) | int onemklCgemm(syclQueue_t device_queue, onemklTranspose transa, onemkl...
  function onemklZgemm (line 612) | int onemklZgemm(syclQueue_t device_queue, onemklTranspose transa, onemkl...
  function onemklSsymm (line 620) | int onemklSsymm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklDsymm (line 628) | int onemklDsymm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklCsymm (line 636) | int onemklCsymm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklZsymm (line 644) | int onemklZsymm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklChemm (line 652) | int onemklChemm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklZhemm (line 660) | int onemklZhemm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklSsyrk (line 668) | int onemklSsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklDsyrk (line 676) | int onemklDsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklCsyrk (line 684) | int onemklCsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklZsyrk (line 692) | int onemklZsyrk(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklCherk (line 700) | int onemklCherk(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklZherk (line 708) | int onemklZherk(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklSsyr2k (line 716) | int onemklSsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemk...
  function onemklDsyr2k (line 724) | int onemklDsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemk...
  function onemklCsyr2k (line 732) | int onemklCsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemk...
  function onemklZsyr2k (line 740) | int onemklZsyr2k(syclQueue_t device_queue, onemklUplo upper_lower, onemk...
  function onemklCher2k (line 748) | int onemklCher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemk...
  function onemklZher2k (line 756) | int onemklZher2k(syclQueue_t device_queue, onemklUplo upper_lower, onemk...
  function onemklStrmm (line 764) | int onemklStrmm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklDtrmm (line 772) | int onemklDtrmm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklCtrmm (line 780) | int onemklCtrmm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklZtrmm (line 788) | int onemklZtrmm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklStrmm_variant (line 796) | int onemklStrmm_variant(syclQueue_t device_queue, onemklSide left_right,...
  function onemklDtrmm_variant (line 804) | int onemklDtrmm_variant(syclQueue_t device_queue, onemklSide left_right,...
  function onemklCtrmm_variant (line 812) | int onemklCtrmm_variant(syclQueue_t device_queue, onemklSide left_right,...
  function onemklZtrmm_variant (line 820) | int onemklZtrmm_variant(syclQueue_t device_queue, onemklSide left_right,...
  function onemklStrsm (line 828) | int onemklStrsm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklDtrsm (line 836) | int onemklDtrsm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklCtrsm (line 844) | int onemklCtrsm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklZtrsm (line 852) | int onemklZtrsm(syclQueue_t device_queue, onemklSide left_right, onemklU...
  function onemklStrsm_variant (line 860) | int onemklStrsm_variant(syclQueue_t device_queue, onemklSide left_right,...
  function onemklDtrsm_variant (line 868) | int onemklDtrsm_variant(syclQueue_t device_queue, onemklSide left_right,...
  function onemklCtrsm_variant (line 876) | int onemklCtrsm_variant(syclQueue_t device_queue, onemklSide left_right,...
  function onemklZtrsm_variant (line 884) | int onemklZtrsm_variant(syclQueue_t device_queue, onemklSide left_right,...
  function onemklSdgmm (line 892) | int onemklSdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t...
  function onemklDdgmm (line 900) | int onemklDdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t...
  function onemklCdgmm (line 908) | int onemklCdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t...
  function onemklZdgmm (line 916) | int onemklZdgmm(syclQueue_t device_queue, onemklSide left_right, int64_t...
  function onemklSgemv (line 924) | int onemklSgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklDgemv (line 932) | int onemklDgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklCgemv (line 940) | int onemklCgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklZgemv (line 948) | int onemklZgemv(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklSgbmv (line 956) | int onemklSgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklDgbmv (line 964) | int onemklDgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklCgbmv (line 972) | int onemklCgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklZgbmv (line 980) | int onemklZgbmv(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklSger (line 988) | int onemklSger(syclQueue_t device_queue, int64_t m, int64_t n, float *al...
  function onemklDger (line 996) | int onemklDger(syclQueue_t device_queue, int64_t m, int64_t n, double *a...
  function onemklCgerc (line 1004) | int onemklCgerc(syclQueue_t device_queue, int64_t m, int64_t n, float _C...
  function onemklZgerc (line 1012) | int onemklZgerc(syclQueue_t device_queue, int64_t m, int64_t n, double _...
  function onemklCgeru (line 1020) | int onemklCgeru(syclQueue_t device_queue, int64_t m, int64_t n, float _C...
  function onemklZgeru (line 1028) | int onemklZgeru(syclQueue_t device_queue, int64_t m, int64_t n, double _...
  function onemklChbmv (line 1036) | int onemklChbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklZhbmv (line 1044) | int onemklZhbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklChemv (line 1052) | int onemklChemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklZhemv (line 1060) | int onemklZhemv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklCher (line 1068) | int onemklCher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t...
  function onemklZher (line 1076) | int onemklZher(syclQueue_t device_queue, onemklUplo upper_lower, int64_t...
  function onemklCher2 (line 1084) | int onemklCher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklZher2 (line 1092) | int onemklZher2(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklChpmv (line 1100) | int onemklChpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklZhpmv (line 1108) | int onemklZhpmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklChpr (line 1116) | int onemklChpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t...
  function onemklZhpr (line 1124) | int onemklZhpr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t...
  function onemklChpr2 (line 1132) | int onemklChpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklZhpr2 (line 1140) | int onemklZhpr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklSsbmv (line 1148) | int onemklSsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklDsbmv (line 1156) | int onemklDsbmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklSsymv (line 1164) | int onemklSsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklDsymv (line 1172) | int onemklDsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklCsymv (line 1180) | int onemklCsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklZsymv (line 1188) | int onemklZsymv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklSsyr (line 1196) | int onemklSsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t...
  function onemklDsyr (line 1204) | int onemklDsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t...
  function onemklCsyr (line 1212) | int onemklCsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t...
  function onemklZsyr (line 1220) | int onemklZsyr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t...
  function onemklSsyr2 (line 1228) | int onemklSsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklDsyr2 (line 1236) | int onemklDsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklCsyr2 (line 1244) | int onemklCsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklZsyr2 (line 1252) | int onemklZsyr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklSspmv (line 1260) | int onemklSspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklDspmv (line 1268) | int onemklDspmv(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklSspr (line 1276) | int onemklSspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t...
  function onemklDspr (line 1284) | int onemklDspr(syclQueue_t device_queue, onemklUplo upper_lower, int64_t...
  function onemklSspr2 (line 1292) | int onemklSspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklDspr2 (line 1300) | int onemklDspr2(syclQueue_t device_queue, onemklUplo upper_lower, int64_...
  function onemklStbmv (line 1308) | int onemklStbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklDtbmv (line 1316) | int onemklDtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklCtbmv (line 1324) | int onemklCtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklZtbmv (line 1332) | int onemklZtbmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklStbsv (line 1340) | int onemklStbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklDtbsv (line 1348) | int onemklDtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklCtbsv (line 1356) | int onemklCtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklZtbsv (line 1364) | int onemklZtbsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklStpmv (line 1372) | int onemklStpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklDtpmv (line 1380) | int onemklDtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklCtpmv (line 1388) | int onemklCtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklZtpmv (line 1396) | int onemklZtpmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklStpsv (line 1404) | int onemklStpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklDtpsv (line 1412) | int onemklDtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklCtpsv (line 1420) | int onemklCtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklZtpsv (line 1428) | int onemklZtpsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklStrmv (line 1436) | int onemklStrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklDtrmv (line 1444) | int onemklDtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklCtrmv (line 1452) | int onemklCtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklZtrmv (line 1460) | int onemklZtrmv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklStrsv (line 1468) | int onemklStrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklDtrsv (line 1476) | int onemklDtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklCtrsv (line 1484) | int onemklCtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklZtrsv (line 1492) | int onemklZtrsv(syclQueue_t device_queue, onemklUplo upper_lower, onemkl...
  function onemklCdotc (line 1500) | int onemklCdotc(syclQueue_t device_queue, int64_t n, float _Complex *x, ...
  function onemklZdotc (line 1508) | int onemklZdotc(syclQueue_t device_queue, int64_t n, double _Complex *x,...
  function onemklCdotu (line 1516) | int onemklCdotu(syclQueue_t device_queue, int64_t n, float _Complex *x, ...
  function onemklZdotu (line 1524) | int onemklZdotu(syclQueue_t device_queue, int64_t n, double _Complex *x,...
  function onemklSiamax (line 1532) | int onemklSiamax(syclQueue_t device_queue, int64_t n, float *x, int64_t ...
  function onemklSiamax_64 (line 1540) | int onemklSiamax_64(syclQueue_t device_queue, int64_t n, float *x, int64...
  function onemklDiamax (line 1548) | int onemklDiamax(syclQueue_t device_queue, int64_t n, double *x, int64_t...
  function onemklDiamax_64 (line 1556) | int onemklDiamax_64(syclQueue_t device_queue, int64_t n, double *x, int6...
  function onemklCiamax (line 1564) | int onemklCiamax(syclQueue_t device_queue, int64_t n, float _Complex *x,...
  function onemklCiamax_64 (line 1572) | int onemklCiamax_64(syclQueue_t device_queue, int64_t n, float _Complex ...
  function onemklZiamax (line 1580) | int onemklZiamax(syclQueue_t device_queue, int64_t n, double _Complex *x...
  function onemklZiamax_64 (line 1588) | int onemklZiamax_64(syclQueue_t device_queue, int64_t n, double _Complex...
  function onemklSiamin (line 1596) | int onemklSiamin(syclQueue_t device_queue, int64_t n, float *x, int64_t ...
  function onemklSiamin_64 (line 1604) | int onemklSiamin_64(syclQueue_t device_queue, int64_t n, float *x, int64...
  function onemklDiamin (line 1612) | int onemklDiamin(syclQueue_t device_queue, int64_t n, double *x, int64_t...
  function onemklDiamin_64 (line 1620) | int onemklDiamin_64(syclQueue_t device_queue, int64_t n, double *x, int6...
  function onemklCiamin (line 1628) | int onemklCiamin(syclQueue_t device_queue, int64_t n, float _Complex *x,...
  function onemklCiamin_64 (line 1636) | int onemklCiamin_64(syclQueue_t device_queue, int64_t n, float _Complex ...
  function onemklZiamin (line 1644) | int onemklZiamin(syclQueue_t device_queue, int64_t n, double _Complex *x...
  function onemklZiamin_64 (line 1652) | int onemklZiamin_64(syclQueue_t device_queue, int64_t n, double _Complex...
  function onemklSasum (line 1660) | int onemklSasum(syclQueue_t device_queue, int64_t n, float *x, int64_t i...
  function onemklDasum (line 1668) | int onemklDasum(syclQueue_t device_queue, int64_t n, double *x, int64_t ...
  function onemklCasum (line 1676) | int onemklCasum(syclQueue_t device_queue, int64_t n, float _Complex *x, ...
  function onemklZasum (line 1684) | int onemklZasum(syclQueue_t device_queue, int64_t n, double _Complex *x,...
  function onemklHaxpy (line 1692) | int onemklHaxpy(syclQueue_t device_queue, int64_t n, short *alpha, short...
  function onemklSaxpy (line 1700) | int onemklSaxpy(syclQueue_t device_queue, int64_t n, float *alpha, float...
  function onemklDaxpy (line 1708) | int onemklDaxpy(syclQueue_t device_queue, int64_t n, double *alpha, doub...
  function onemklCaxpy (line 1716) | int onemklCaxpy(syclQueue_t device_queue, int64_t n, float _Complex *alp...
  function onemklZaxpy (line 1724) | int onemklZaxpy(syclQueue_t device_queue, int64_t n, double _Complex *al...
  function onemklSaxpby (line 1732) | int onemklSaxpby(syclQueue_t device_queue, int64_t n, float *alpha, floa...
  function onemklDaxpby (line 1740) | int onemklDaxpby(syclQueue_t device_queue, int64_t n, double *alpha, dou...
  function onemklCaxpby (line 1748) | int onemklCaxpby(syclQueue_t device_queue, int64_t n, float _Complex *al...
  function onemklZaxpby (line 1756) | int onemklZaxpby(syclQueue_t device_queue, int64_t n, double _Complex *a...
  function onemklScopy (line 1764) | int onemklScopy(syclQueue_t device_queue, int64_t n, float *x, int64_t i...
  function onemklDcopy (line 1772) | int onemklDcopy(syclQueue_t device_queue, int64_t n, double *x, int64_t ...
  function onemklCcopy (line 1780) | int onemklCcopy(syclQueue_t device_queue, int64_t n, float _Complex *x, ...
  function onemklZcopy (line 1788) | int onemklZcopy(syclQueue_t device_queue, int64_t n, double _Complex *x,...
  function onemklHdot (line 1796) | int onemklHdot(syclQueue_t device_queue, int64_t n, short *x, int64_t in...
  function onemklSdot (line 1804) | int onemklSdot(syclQueue_t device_queue, int64_t n, float *x, int64_t in...
  function onemklDdot (line 1812) | int onemklDdot(syclQueue_t device_queue, int64_t n, double *x, int64_t i...
  function onemklSsdsdot (line 1820) | int onemklSsdsdot(syclQueue_t device_queue, int64_t n, float *sb, float ...
  function onemklHnrm2 (line 1828) | int onemklHnrm2(syclQueue_t device_queue, int64_t n, short *x, int64_t i...
  function onemklSnrm2 (line 1836) | int onemklSnrm2(syclQueue_t device_queue, int64_t n, float *x, int64_t i...
  function onemklDnrm2 (line 1844) | int onemklDnrm2(syclQueue_t device_queue, int64_t n, double *x, int64_t ...
  function onemklCnrm2 (line 1852) | int onemklCnrm2(syclQueue_t device_queue, int64_t n, float _Complex *x, ...
  function onemklZnrm2 (line 1860) | int onemklZnrm2(syclQueue_t device_queue, int64_t n, double _Complex *x,...
  function onemklHrot (line 1868) | int onemklHrot(syclQueue_t device_queue, int64_t n, short *x, int64_t in...
  function onemklSrot (line 1876) | int onemklSrot(syclQueue_t device_queue, int64_t n, float *x, int64_t in...
  function onemklDrot (line 1884) | int onemklDrot(syclQueue_t device_queue, int64_t n, double *x, int64_t i...
  function onemklCSrot (line 1892) | int onemklCSrot(syclQueue_t device_queue, int64_t n, float _Complex *x, ...
  function onemklCrot (line 1900) | int onemklCrot(syclQueue_t device_queue, int64_t n, float _Complex *x, i...
  function onemklZDrot (line 1908) | int onemklZDrot(syclQueue_t device_queue, int64_t n, double _Complex *x,...
  function onemklZrot (line 1916) | int onemklZrot(syclQueue_t device_queue, int64_t n, double _Complex *x, ...
  function onemklSrotg (line 1924) | int onemklSrotg(syclQueue_t device_queue, float *a, float *b, float *c, ...
  function onemklDrotg (line 1932) | int onemklDrotg(syclQueue_t device_queue, double *a, double *b, double *...
  function onemklCrotg (line 1940) | int onemklCrotg(syclQueue_t device_queue, float _Complex *a, float _Comp...
  function onemklZrotg (line 1948) | int onemklZrotg(syclQueue_t device_queue, double _Complex *a, double _Co...
  function onemklSrotm (line 1956) | int onemklSrotm(syclQueue_t device_queue, int64_t n, float *x, int64_t i...
  function onemklDrotm (line 1964) | int onemklDrotm(syclQueue_t device_queue, int64_t n, double *x, int64_t ...
  function onemklSrotmg (line 1972) | int onemklSrotmg(syclQueue_t device_queue, float *d1, float *d2, float *...
  function onemklDrotmg (line 1980) | int onemklDrotmg(syclQueue_t device_queue, double *d1, double *d2, doubl...
  function onemklHscal (line 1988) | int onemklHscal(syclQueue_t device_queue, int64_t n, short *alpha, short...
  function onemklSscal (line 1996) | int onemklSscal(syclQueue_t device_queue, int64_t n, float *alpha, float...
  function onemklDscal (line 2004) | int onemklDscal(syclQueue_t device_queue, int64_t n, double *alpha, doub...
  function onemklCSscal (line 2012) | int onemklCSscal(syclQueue_t device_queue, int64_t n, float *alpha, floa...
  function onemklZDscal (line 2020) | int onemklZDscal(syclQueue_t device_queue, int64_t n, double *alpha, dou...
  function onemklCscal (line 2028) | int onemklCscal(syclQueue_t device_queue, int64_t n, float _Complex *alp...
  function onemklZscal (line 2036) | int onemklZscal(syclQueue_t device_queue, int64_t n, double _Complex *al...
  function onemklSswap (line 2044) | int onemklSswap(syclQueue_t device_queue, int64_t n, float *x, int64_t i...
  function onemklDswap (line 2052) | int onemklDswap(syclQueue_t device_queue, int64_t n, double *x, int64_t ...
  function onemklCswap (line 2060) | int onemklCswap(syclQueue_t device_queue, int64_t n, float _Complex *x, ...
  function onemklZswap (line 2068) | int onemklZswap(syclQueue_t device_queue, int64_t n, double _Complex *x,...
  function onemklHgemm_batch_strided (line 2076) | int onemklHgemm_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklSgemm_batch_strided (line 2084) | int onemklSgemm_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklDgemm_batch_strided (line 2092) | int onemklDgemm_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklCgemm_batch_strided (line 2100) | int onemklCgemm_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklZgemm_batch_strided (line 2108) | int onemklZgemm_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklSsyrk_batch_strided (line 2116) | int onemklSsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper...
  function onemklDsyrk_batch_strided (line 2124) | int onemklDsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper...
  function onemklCsyrk_batch_strided (line 2132) | int onemklCsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper...
  function onemklZsyrk_batch_strided (line 2140) | int onemklZsyrk_batch_strided(syclQueue_t device_queue, onemklUplo upper...
  function onemklStrsm_batch_strided (line 2148) | int onemklStrsm_batch_strided(syclQueue_t device_queue, onemklSide left_...
  function onemklDtrsm_batch_strided (line 2156) | int onemklDtrsm_batch_strided(syclQueue_t device_queue, onemklSide left_...
  function onemklCtrsm_batch_strided (line 2164) | int onemklCtrsm_batch_strided(syclQueue_t device_queue, onemklSide left_...
  function onemklZtrsm_batch_strided (line 2172) | int onemklZtrsm_batch_strided(syclQueue_t device_queue, onemklSide left_...
  function onemklSgemv_batch_strided (line 2180) | int onemklSgemv_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklDgemv_batch_strided (line 2188) | int onemklDgemv_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklCgemv_batch_strided (line 2196) | int onemklCgemv_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklZgemv_batch_strided (line 2204) | int onemklZgemv_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklSdgmm_batch_strided (line 2212) | int onemklSdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_...
  function onemklDdgmm_batch_strided (line 2220) | int onemklDdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_...
  function onemklCdgmm_batch_strided (line 2228) | int onemklCdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_...
  function onemklZdgmm_batch_strided (line 2236) | int onemklZdgmm_batch_strided(syclQueue_t device_queue, onemklSide left_...
  function onemklSaxpy_batch_strided (line 2244) | int onemklSaxpy_batch_strided(syclQueue_t device_queue, int64_t n, float...
  function onemklDaxpy_batch_strided (line 2252) | int onemklDaxpy_batch_strided(syclQueue_t device_queue, int64_t n, doubl...
  function onemklCaxpy_batch_strided (line 2260) | int onemklCaxpy_batch_strided(syclQueue_t device_queue, int64_t n, float...
  function onemklZaxpy_batch_strided (line 2268) | int onemklZaxpy_batch_strided(syclQueue_t device_queue, int64_t n, doubl...
  function onemklScopy_batch_strided (line 2276) | int onemklScopy_batch_strided(syclQueue_t device_queue, int64_t n, float...
  function onemklDcopy_batch_strided (line 2284) | int onemklDcopy_batch_strided(syclQueue_t device_queue, int64_t n, doubl...
  function onemklCcopy_batch_strided (line 2292) | int onemklCcopy_batch_strided(syclQueue_t device_queue, int64_t n, float...
  function onemklZcopy_batch_strided (line 2300) | int onemklZcopy_batch_strided(syclQueue_t device_queue, int64_t n, doubl...
  function onemklSgemmt (line 2308) | int onemklSgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemk...
  function onemklDgemmt (line 2316) | int onemklDgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemk...
  function onemklCgemmt (line 2324) | int onemklCgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemk...
  function onemklZgemmt (line 2332) | int onemklZgemmt(syclQueue_t device_queue, onemklUplo upper_lower, onemk...
  function onemklSimatcopy (line 2340) | int onemklSimatcopy(syclQueue_t device_queue, onemklTranspose trans, int...
  function onemklDimatcopy (line 2348) | int onemklDimatcopy(syclQueue_t device_queue, onemklTranspose trans, int...
  function onemklCimatcopy (line 2356) | int onemklCimatcopy(syclQueue_t device_queue, onemklTranspose trans, int...
  function onemklZimatcopy (line 2364) | int onemklZimatcopy(syclQueue_t device_queue, onemklTranspose trans, int...
  function onemklSomatcopy (line 2372) | int onemklSomatcopy(syclQueue_t device_queue, onemklTranspose trans, int...
  function onemklDomatcopy (line 2380) | int onemklDomatcopy(syclQueue_t device_queue, onemklTranspose trans, int...
  function onemklComatcopy (line 2388) | int onemklComatcopy(syclQueue_t device_queue, onemklTranspose trans, int...
  function onemklZomatcopy (line 2396) | int onemklZomatcopy(syclQueue_t device_queue, onemklTranspose trans, int...
  function onemklSomatadd (line 2404) | int onemklSomatadd(syclQueue_t device_queue, onemklTranspose transa, one...
  function onemklDomatadd (line 2412) | int onemklDomatadd(syclQueue_t device_queue, onemklTranspose transa, one...
  function onemklComatadd (line 2420) | int onemklComatadd(syclQueue_t device_queue, onemklTranspose transa, one...
  function onemklZomatadd (line 2428) | int onemklZomatadd(syclQueue_t device_queue, onemklTranspose transa, one...
  function onemklSimatcopy_batch_strided (line 2436) | int onemklSimatcopy_batch_strided(syclQueue_t device_queue, onemklTransp...
  function onemklDimatcopy_batch_strided (line 2444) | int onemklDimatcopy_batch_strided(syclQueue_t device_queue, onemklTransp...
  function onemklCimatcopy_batch_strided (line 2452) | int onemklCimatcopy_batch_strided(syclQueue_t device_queue, onemklTransp...
  function onemklZimatcopy_batch_strided (line 2460) | int onemklZimatcopy_batch_strided(syclQueue_t device_queue, onemklTransp...
  function onemklSomatcopy_batch_strided (line 2468) | int onemklSomatcopy_batch_strided(syclQueue_t device_queue, onemklTransp...
  function onemklDomatcopy_batch_strided (line 2476) | int onemklDomatcopy_batch_strided(syclQueue_t device_queue, onemklTransp...
  function onemklComatcopy_batch_strided (line 2484) | int onemklComatcopy_batch_strided(syclQueue_t device_queue, onemklTransp...
  function onemklZomatcopy_batch_strided (line 2492) | int onemklZomatcopy_batch_strided(syclQueue_t device_queue, onemklTransp...
  function onemklSomatadd_batch_strided (line 2500) | int onemklSomatadd_batch_strided(syclQueue_t device_queue, onemklTranspo...
  function onemklDomatadd_batch_strided (line 2508) | int onemklDomatadd_batch_strided(syclQueue_t device_queue, onemklTranspo...
  function onemklComatadd_batch_strided (line 2516) | int onemklComatadd_batch_strided(syclQueue_t device_queue, onemklTranspo...
  function onemklZomatadd_batch_strided (line 2524) | int onemklZomatadd_batch_strided(syclQueue_t device_queue, onemklTranspo...
  function onemklSpotrf (line 2533) | int onemklSpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklDpotrf (line 2541) | int onemklDpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklCpotrf (line 2549) | int onemklCpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklZpotrf (line 2557) | int onemklZpotrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklSpotrs (line 2565) | int onemklSpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, i...
  function onemklDpotrs (line 2573) | int onemklDpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, i...
  function onemklCpotrs (line 2581) | int onemklCpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, i...
  function onemklZpotrs (line 2589) | int onemklZpotrs(syclQueue_t device_queue, onemklUplo uplo, int64_t n, i...
  function onemklSpotri (line 2597) | int onemklSpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklDpotri (line 2605) | int onemklDpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklCpotri (line 2613) | int onemklCpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklZpotri (line 2621) | int onemklZpotri(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklStrtri (line 2629) | int onemklStrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag d...
  function onemklDtrtri (line 2637) | int onemklDtrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag d...
  function onemklCtrtri (line 2645) | int onemklCtrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag d...
  function onemklZtrtri (line 2653) | int onemklZtrtri(syclQueue_t device_queue, onemklUplo uplo, onemklDiag d...
  function onemklSgesv (line 2661) | int onemklSgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, float...
  function onemklDgesv (line 2669) | int onemklDgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, doubl...
  function onemklCgesv (line 2677) | int onemklCgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, float...
  function onemklZgesv (line 2685) | int onemklZgesv(syclQueue_t device_queue, int64_t n, int64_t nrhs, doubl...
  function onemklCgebrd (line 2693) | int onemklCgebrd(syclQueue_t device_queue, int64_t m, int64_t n, float _...
  function onemklDgebrd (line 2701) | int onemklDgebrd(syclQueue_t device_queue, int64_t m, int64_t n, double ...
  function onemklSgebrd (line 2709) | int onemklSgebrd(syclQueue_t device_queue, int64_t m, int64_t n, float *...
  function onemklZgebrd (line 2717) | int onemklZgebrd(syclQueue_t device_queue, int64_t m, int64_t n, double ...
  function onemklCgeqrf (line 2725) | int onemklCgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, float _...
  function onemklDgeqrf (line 2733) | int onemklDgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, double ...
  function onemklSgeqrf (line 2741) | int onemklSgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, float *...
  function onemklZgeqrf (line 2749) | int onemklZgeqrf(syclQueue_t device_queue, int64_t m, int64_t n, double ...
  function onemklCgesvd (line 2757) | int onemklCgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobs...
  function onemklZgesvd (line 2765) | int onemklZgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobs...
  function onemklDgesvd (line 2773) | int onemklDgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobs...
  function onemklSgesvd (line 2781) | int onemklSgesvd(syclQueue_t device_queue, onemklJobsvd jobu, onemklJobs...
  function onemklCgesvda_batch_strided (line 2789) | int onemklCgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm...
  function onemklDgesvda_batch_strided (line 2797) | int onemklDgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm...
  function onemklSgesvda_batch_strided (line 2805) | int onemklSgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm...
  function onemklZgesvda_batch_strided (line 2813) | int onemklZgesvda_batch_strided(syclQueue_t device_queue, int64_t *iparm...
  function onemklCgetrf (line 2821) | int onemklCgetrf(syclQueue_t device_queue, int64_t m, int64_t n, float _...
  function onemklDgetrf (line 2829) | int onemklDgetrf(syclQueue_t device_queue, int64_t m, int64_t n, double ...
  function onemklSgetrf (line 2837) | int onemklSgetrf(syclQueue_t device_queue, int64_t m, int64_t n, float *...
  function onemklZgetrf (line 2845) | int onemklZgetrf(syclQueue_t device_queue, int64_t m, int64_t n, double ...
  function onemklCgetrf_batch (line 2853) | int onemklCgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklDgetrf_batch (line 2861) | int onemklDgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklSgetrf_batch (line 2869) | int onemklSgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklZgetrf_batch (line 2877) | int onemklZgetrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklCgetrf_batch_strided (line 2885) | int onemklCgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklDgetrf_batch_strided (line 2893) | int onemklDgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklSgetrf_batch_strided (line 2901) | int onemklSgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklZgetrf_batch_strided (line 2909) | int onemklZgetrf_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklCgetrfnp (line 2917) | int onemklCgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, float...
  function onemklDgetrfnp (line 2925) | int onemklDgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, doubl...
  function onemklSgetrfnp (line 2933) | int onemklSgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, float...
  function onemklZgetrfnp (line 2941) | int onemklZgetrfnp(syclQueue_t device_queue, int64_t m, int64_t n, doubl...
  function onemklCgetrfnp_batch (line 2949) | int onemklCgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *...
  function onemklDgetrfnp_batch (line 2957) | int onemklDgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *...
  function onemklSgetrfnp_batch (line 2965) | int onemklSgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *...
  function onemklZgetrfnp_batch (line 2973) | int onemklZgetrfnp_batch(syclQueue_t device_queue, int64_t *m, int64_t *...
  function onemklCgetrfnp_batch_strided (line 2981) | int onemklCgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, in...
  function onemklDgetrfnp_batch_strided (line 2989) | int onemklDgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, in...
  function onemklSgetrfnp_batch_strided (line 2997) | int onemklSgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, in...
  function onemklZgetrfnp_batch_strided (line 3005) | int onemklZgetrfnp_batch_strided(syclQueue_t device_queue, int64_t m, in...
  function onemklCgetri (line 3013) | int onemklCgetri(syclQueue_t device_queue, int64_t n, float _Complex *a,...
  function onemklDgetri (line 3021) | int onemklDgetri(syclQueue_t device_queue, int64_t n, double *a, int64_t...
  function onemklSgetri (line 3029) | int onemklSgetri(syclQueue_t device_queue, int64_t n, float *a, int64_t ...
  function onemklZgetri (line 3037) | int onemklZgetri(syclQueue_t device_queue, int64_t n, double _Complex *a...
  function onemklCgetrs (line 3045) | int onemklCgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_...
  function onemklDgetrs (line 3053) | int onemklDgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_...
  function onemklSgetrs (line 3061) | int onemklSgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_...
  function onemklZgetrs (line 3069) | int onemklZgetrs(syclQueue_t device_queue, onemklTranspose trans, int64_...
  function onemklCgetrs_batch_strided (line 3077) | int onemklCgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose...
  function onemklDgetrs_batch_strided (line 3085) | int onemklDgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose...
  function onemklSgetrs_batch_strided (line 3093) | int onemklSgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose...
  function onemklZgetrs_batch_strided (line 3101) | int onemklZgetrs_batch_strided(syclQueue_t device_queue, onemklTranspose...
  function onemklCgetrsnp_batch_strided (line 3109) | int onemklCgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspo...
  function onemklDgetrsnp_batch_strided (line 3117) | int onemklDgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspo...
  function onemklSgetrsnp_batch_strided (line 3125) | int onemklSgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspo...
  function onemklZgetrsnp_batch_strided (line 3133) | int onemklZgetrsnp_batch_strided(syclQueue_t device_queue, onemklTranspo...
  function onemklCheev (line 3141) | int onemklCheev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo u...
  function onemklZheev (line 3149) | int onemklZheev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo u...
  function onemklCheevd (line 3157) | int onemklCheevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo up...
  function onemklZheevd (line 3165) | int onemklZheevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo up...
  function onemklCheevx (line 3173) | int onemklCheevx(syclQueue_t device_queue, onemklCompz jobz, onemklRange...
  function onemklZheevx (line 3181) | int onemklZheevx(syclQueue_t device_queue, onemklCompz jobz, onemklRange...
  function onemklChegvd (line 3189) | int onemklChegvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz...
  function onemklZhegvd (line 3197) | int onemklZhegvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz...
  function onemklChegvx (line 3205) | int onemklChegvx(syclQueue_t device_queue, int64_t itype, onemklCompz jo...
  function onemklZhegvx (line 3213) | int onemklZhegvx(syclQueue_t device_queue, int64_t itype, onemklCompz jo...
  function onemklChetrd (line 3221) | int onemklChetrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklZhetrd (line 3229) | int onemklZhetrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklChetrf (line 3237) | int onemklChetrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklZhetrf (line 3245) | int onemklZhetrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklSorgbr (line 3253) | int onemklSorgbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m...
  function onemklDorgbr (line 3261) | int onemklDorgbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m...
  function onemklDorgqr (line 3269) | int onemklDorgqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t...
  function onemklSorgqr (line 3277) | int onemklSorgqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t...
  function onemklDormqr (line 3285) | int onemklDormqr(syclQueue_t device_queue, onemklSide side, onemklTransp...
  function onemklSormqr (line 3293) | int onemklSormqr(syclQueue_t device_queue, onemklSide side, onemklTransp...
  function onemklCsteqr (line 3301) | int onemklCsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n,...
  function onemklDsteqr (line 3309) | int onemklDsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n,...
  function onemklSsteqr (line 3317) | int onemklSsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n,...
  function onemklZsteqr (line 3325) | int onemklZsteqr(syclQueue_t device_queue, onemklCompz compz, int64_t n,...
  function onemklDsyev (line 3333) | int onemklDsyev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo u...
  function onemklSsyev (line 3341) | int onemklSsyev(syclQueue_t device_queue, onemklCompz jobz, onemklUplo u...
  function onemklDsyevd (line 3349) | int onemklDsyevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo up...
  function onemklSsyevd (line 3357) | int onemklSsyevd(syclQueue_t device_queue, onemklJob jobz, onemklUplo up...
  function onemklDsyevx (line 3365) | int onemklDsyevx(syclQueue_t device_queue, onemklCompz jobz, onemklRange...
  function onemklSsyevx (line 3373) | int onemklSsyevx(syclQueue_t device_queue, onemklCompz jobz, onemklRange...
  function onemklDsygvd (line 3381) | int onemklDsygvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz...
  function onemklSsygvd (line 3389) | int onemklSsygvd(syclQueue_t device_queue, int64_t itype, onemklJob jobz...
  function onemklDsygvx (line 3397) | int onemklDsygvx(syclQueue_t device_queue, int64_t itype, onemklCompz jo...
  function onemklSsygvx (line 3405) | int onemklSsygvx(syclQueue_t device_queue, int64_t itype, onemklCompz jo...
  function onemklDsytrd (line 3413) | int onemklDsytrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklSsytrd (line 3421) | int onemklSsytrd(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklCtrtrs (line 3429) | int onemklCtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTransp...
  function onemklDtrtrs (line 3437) | int onemklDtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTransp...
  function onemklStrtrs (line 3445) | int onemklStrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTransp...
  function onemklZtrtrs (line 3453) | int onemklZtrtrs(syclQueue_t device_queue, onemklUplo uplo, onemklTransp...
  function onemklCungbr (line 3461) | int onemklCungbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m...
  function onemklZungbr (line 3469) | int onemklZungbr(syclQueue_t device_queue, onemklGenerate vec, int64_t m...
  function onemklCungqr (line 3477) | int onemklCungqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t...
  function onemklZungqr (line 3485) | int onemklZungqr(syclQueue_t device_queue, int64_t m, int64_t n, int64_t...
  function onemklCunmqr (line 3493) | int onemklCunmqr(syclQueue_t device_queue, onemklSide side, onemklTransp...
  function onemklZunmqr (line 3501) | int onemklZunmqr(syclQueue_t device_queue, onemklSide side, onemklTransp...
  function onemklSgerqf (line 3509) | int onemklSgerqf(syclQueue_t device_queue, int64_t m, int64_t n, float *...
  function onemklDgerqf (line 3517) | int onemklDgerqf(syclQueue_t device_queue, int64_t m, int64_t n, double ...
  function onemklCgerqf (line 3525) | int onemklCgerqf(syclQueue_t device_queue, int64_t m, int64_t n, float _...
  function onemklZgerqf (line 3533) | int onemklZgerqf(syclQueue_t device_queue, int64_t m, int64_t n, double ...
  function onemklSormrq (line 3541) | int onemklSormrq(syclQueue_t device_queue, onemklSide side, onemklTransp...
  function onemklDormrq (line 3549) | int onemklDormrq(syclQueue_t device_queue, onemklSide side, onemklTransp...
  function onemklCunmrq (line 3557) | int onemklCunmrq(syclQueue_t device_queue, onemklSide side, onemklTransp...
  function onemklZunmrq (line 3565) | int onemklZunmrq(syclQueue_t device_queue, onemklSide side, onemklTransp...
  function onemklSsytrf (line 3573) | int onemklSsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklDsytrf (line 3581) | int onemklDsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklCsytrf (line 3589) | int onemklCsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklZsytrf (line 3597) | int onemklZsytrf(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklSorgtr (line 3605) | int onemklSorgtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklDorgtr (line 3613) | int onemklDorgtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklCungtr (line 3621) | int onemklCungtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, f...
  function onemklZungtr (line 3629) | int onemklZungtr(syclQueue_t device_queue, onemklUplo uplo, int64_t n, d...
  function onemklSormtr (line 3637) | int onemklSormtr(syclQueue_t device_queue, onemklSide side, onemklUplo u...
  function onemklDormtr (line 3645) | int onemklDormtr(syclQueue_t device_queue, onemklSide side, onemklUplo u...
  function onemklCunmtr (line 3653) | int onemklCunmtr(syclQueue_t device_queue, onemklSide side, onemklUplo u...
  function onemklZunmtr (line 3661) | int onemklZunmtr(syclQueue_t device_queue, onemklSide side, onemklUplo u...
  function onemklSgels (line 3669) | int onemklSgels(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklDgels (line 3677) | int onemklDgels(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklCgels (line 3685) | int onemklCgels(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklZgels (line 3693) | int onemklZgels(syclQueue_t device_queue, onemklTranspose trans, int64_t...
  function onemklSpotrf_batch (line 3701) | int onemklSpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64...
  function onemklDpotrf_batch (line 3709) | int onemklDpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64...
  function onemklCpotrf_batch (line 3717) | int onemklCpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64...
  function onemklZpotrf_batch (line 3725) | int onemklZpotrf_batch(syclQueue_t device_queue, onemklUplo *uplo, int64...
  function onemklSpotrs_batch (line 3733) | int onemklSpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64...
  function onemklDpotrs_batch (line 3741) | int onemklDpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64...
  function onemklCpotrs_batch (line 3749) | int onemklCpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64...
  function onemklZpotrs_batch (line 3757) | int onemklZpotrs_batch(syclQueue_t device_queue, onemklUplo *uplo, int64...
  function onemklSgeinv_batch (line 3765) | int onemklSgeinv_batch(syclQueue_t device_queue, int64_t *n, float **a, ...
  function onemklDgeinv_batch (line 3773) | int onemklDgeinv_batch(syclQueue_t device_queue, int64_t *n, double **a,...
  function onemklCgeinv_batch (line 3781) | int onemklCgeinv_batch(syclQueue_t device_queue, int64_t *n, float _Comp...
  function onemklZgeinv_batch (line 3789) | int onemklZgeinv_batch(syclQueue_t device_queue, int64_t *n, double _Com...
  function onemklSgetrs_batch (line 3797) | int onemklSgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans,...
  function onemklDgetrs_batch (line 3805) | int onemklDgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans,...
  function onemklCgetrs_batch (line 3813) | int onemklCgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans,...
  function onemklZgetrs_batch (line 3821) | int onemklZgetrs_batch(syclQueue_t device_queue, onemklTranspose *trans,...
  function onemklSgetri_batch (line 3829) | int onemklSgetri_batch(syclQueue_t device_queue, int64_t *n, float **a, ...
  function onemklDgetri_batch (line 3837) | int onemklDgetri_batch(syclQueue_t device_queue, int64_t *n, double **a,...
  function onemklCgetri_batch (line 3845) | int onemklCgetri_batch(syclQueue_t device_queue, int64_t *n, float _Comp...
  function onemklZgetri_batch (line 3853) | int onemklZgetri_batch(syclQueue_t device_queue, int64_t *n, double _Com...
  function onemklSgeqrf_batch (line 3861) | int onemklSgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklDgeqrf_batch (line 3869) | int onemklDgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklCgeqrf_batch (line 3877) | int onemklCgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklZgeqrf_batch (line 3885) | int onemklZgeqrf_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklSorgqr_batch (line 3893) | int onemklSorgqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklDorgqr_batch (line 3901) | int onemklDorgqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklCungqr_batch (line 3909) | int onemklCungqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklZungqr_batch (line 3917) | int onemklZungqr_batch(syclQueue_t device_queue, int64_t *m, int64_t *n,...
  function onemklSormqr_batch (line 3925) | int onemklSormqr_batch(syclQueue_t device_queue, onemklSide *side, onemk...
  function onemklDormqr_batch (line 3933) | int onemklDormqr_batch(syclQueue_t device_queue, onemklSide *side, onemk...
  function onemklCunmqr_batch (line 3941) | int onemklCunmqr_batch(syclQueue_t device_queue, onemklSide *side, onemk...
  function onemklZunmqr_batch (line 3949) | int onemklZunmqr_batch(syclQueue_t device_queue, onemklSide *side, onemk...
  function onemklStrtrs_batch (line 3957) | int onemklStrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemk...
  function onemklDtrtrs_batch (line 3965) | int onemklDtrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemk...
  function onemklCtrtrs_batch (line 3973) | int onemklCtrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemk...
  function onemklZtrtrs_batch (line 3981) | int onemklZtrtrs_batch(syclQueue_t device_queue, onemklUplo *uplo, onemk...
  function onemklSgels_batch (line 3989) | int onemklSgels_batch(syclQueue_t device_queue, onemklTranspose *trans, ...
  function onemklDgels_batch (line 3997) | int onemklDgels_batch(syclQueue_t device_queue, onemklTranspose *trans, ...
  function onemklCgels_batch (line 4005) | int onemklCgels_batch(syclQueue_t device_queue, onemklTranspose *trans, ...
  function onemklZgels_batch (line 4013) | int onemklZgels_batch(syclQueue_t device_queue, onemklTranspose *trans, ...
  function onemklSpotrf_batch_strided (line 4021) | int onemklSpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo...
  function onemklDpotrf_batch_strided (line 4029) | int onemklDpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo...
  function onemklCpotrf_batch_strided (line 4037) | int onemklCpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo...
  function onemklZpotrf_batch_strided (line 4045) | int onemklZpotrf_batch_strided(syclQueue_t device_queue, onemklUplo uplo...
  function onemklSpotrs_batch_strided (line 4053) | int onemklSpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo...
  function onemklDpotrs_batch_strided (line 4061) | int onemklDpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo...
  function onemklCpotrs_batch_strided (line 4069) | int onemklCpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo...
  function onemklZpotrs_batch_strided (line 4077) | int onemklZpotrs_batch_strided(syclQueue_t device_queue, onemklUplo uplo...
  function onemklSgeqrf_batch_strided (line 4085) | int onemklSgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklDgeqrf_batch_strided (line 4093) | int onemklDgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklCgeqrf_batch_strided (line 4101) | int onemklCgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklZgeqrf_batch_strided (line 4109) | int onemklZgeqrf_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklSorgqr_batch_strided (line 4117) | int onemklSorgqr_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklDorgqr_batch_strided (line 4125) | int onemklDorgqr_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklCungqr_batch_strided (line 4133) | int onemklCungqr_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklZungqr_batch_strided (line 4141) | int onemklZungqr_batch_strided(syclQueue_t device_queue, int64_t m, int6...
  function onemklSgetri_batch_strided (line 4149) | int onemklSgetri_batch_strided(syclQueue_t device_queue, int64_t n, floa...
  function onemklDgetri_batch_strided (line 4157) | int onemklDgetri_batch_strided(syclQueue_t device_queue, int64_t n, doub...
  function onemklCgetri_batch_strided (line 4165) | int onemklCgetri_batch_strided(syclQueue_t device_queue, int64_t n, floa...
  function onemklZgetri_batch_strided (line 4173) | int onemklZgetri_batch_strided(syclQueue_t device_queue, int64_t n, doub...
  function onemklSgels_batch_strided (line 4181) | int onemklSgels_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklDgels_batch_strided (line 4189) | int onemklDgels_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklCgels_batch_strided (line 4197) | int onemklCgels_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklZgels_batch_strided (line 4205) | int onemklZgels_batch_strided(syclQueue_t device_queue, onemklTranspose ...
  function onemklSgebrd_scratchpad_size (line 4213) | int64_t onemklSgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklDgebrd_scratchpad_size (line 4219) | int64_t onemklDgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklCgebrd_scratchpad_size (line 4225) | int64_t onemklCgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklZgebrd_scratchpad_size (line 4231) | int64_t onemklZgebrd_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklSgels_scratchpad_size (line 4237) | int64_t onemklSgels_scratchpad_size(syclQueue_t device_queue, onemklTran...
  function onemklDgels_scratchpad_size (line 4243) | int64_t onemklDgels_scratchpad_size(syclQueue_t device_queue, onemklTran...
  function onemklCgels_scratchpad_size (line 4249) | int64_t onemklCgels_scratchpad_size(syclQueue_t device_queue, onemklTran...
  function onemklZgels_scratchpad_size (line 4255) | int64_t onemklZgels_scratchpad_size(syclQueue_t device_queue, onemklTran...
  function onemklSgeqrf_scratchpad_size (line 4261) | int64_t onemklSgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklDgeqrf_scratchpad_size (line 4267) | int64_t onemklDgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklCgeqrf_scratchpad_size (line 4273) | int64_t onemklCgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklZgeqrf_scratchpad_size (line 4279) | int64_t onemklZgeqrf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklSgerqf_scratchpad_size (line 4285) | int64_t onemklSgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklDgerqf_scratchpad_size (line 4291) | int64_t onemklDgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklCgerqf_scratchpad_size (line 4297) | int64_t onemklCgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklZgerqf_scratchpad_size (line 4303) | int64_t onemklZgerqf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklSgesv_scratchpad_size (line 4309) | int64_t onemklSgesv_scratchpad_size(syclQueue_t device_queue, int64_t n,...
  function onemklDgesv_scratchpad_size (line 4315) | int64_t onemklDgesv_scratchpad_size(syclQueue_t device_queue, int64_t n,...
  function onemklCgesv_scratchpad_size (line 4321) | int64_t onemklCgesv_scratchpad_size(syclQueue_t device_queue, int64_t n,...
  function onemklZgesv_scratchpad_size (line 4327) | int64_t onemklZgesv_scratchpad_size(syclQueue_t device_queue, int64_t n,...
  function onemklSgesvd_scratchpad_size (line 4333) | int64_t onemklSgesvd_scratchpad_size(syclQueue_t device_queue, onemklJob...
  function onemklDgesvd_scratchpad_size (line 4339) | int64_t onemklDgesvd_scratchpad_size(syclQueue_t device_queue, onemklJob...
  function onemklCgesvd_scratchpad_size (line 4345) | int64_t onemklCgesvd_scratchpad_size(syclQueue_t device_queue, onemklJob...
  function onemklZgesvd_scratchpad_size (line 4351) | int64_t onemklZgesvd_scratchpad_size(syclQueue_t device_queue, onemklJob...
  function onemklSgetrf_scratchpad_size (line 4357) | int64_t onemklSgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklDgetrf_scratchpad_size (line 4363) | int64_t onemklDgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklCgetrf_scratchpad_size (line 4369) | int64_t onemklCgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklZgetrf_scratchpad_size (line 4375) | int64_t onemklZgetrf_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklSgetrfnp_scratchpad_size (line 4381) | int64_t onemklSgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t...
  function onemklDgetrfnp_scratchpad_size (line 4387) | int64_t onemklDgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t...
  function onemklCgetrfnp_scratchpad_size (line 4393) | int64_t onemklCgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t...
  function onemklZgetrfnp_scratchpad_size (line 4399) | int64_t onemklZgetrfnp_scratchpad_size(syclQueue_t device_queue, int64_t...
  function onemklSgetri_scratchpad_size (line 4405) | int64_t onemklSgetri_scratchpad_size(syclQueue_t device_queue, int64_t n...
  function onemklDgetri_scratchpad_size (line 4411) | int64_t onemklDgetri_scratchpad_size(syclQueue_t device_queue, int64_t n...
  function onemklCgetri_scratchpad_size (line 4417) | int64_t onemklCgetri_scratchpad_size(syclQueue_t device_queue, int64_t n...
  function onemklZgetri_scratchpad_size (line 4423) | int64_t onemklZgetri_scratchpad_size(syclQueue_t device_queue, int64_t n...
  function onemklSgetrs_scratchpad_size (line 4429) | int64_t onemklSgetrs_scratchpad_size(syclQueue_t device_queue, onemklTra...
  function onemklDgetrs_scratchpad_size (line 4435) | int64_t onemklDgetrs_scratchpad_size(syclQueue_t device_queue, onemklTra...
  function onemklCgetrs_scratchpad_size (line 4441) | int64_t onemklCgetrs_scratchpad_size(syclQueue_t device_queue, onemklTra...
  function onemklZgetrs_scratchpad_size (line 4447) | int64_t onemklZgetrs_scratchpad_size(syclQueue_t device_queue, onemklTra...
  function onemklCheev_scratchpad_size (line 4453) | int64_t onemklCheev_scratchpad_size(syclQueue_t device_queue, onemklComp...
  function onemklZheev_scratchpad_size (line 4459) | int64_t onemklZheev_scratchpad_size(syclQueue_t device_queue, onemklComp...
  function onemklCheevd_scratchpad_size (line 4465) | int64_t onemklCheevd_scratchpad_size(syclQueue_t device_queue, onemklJob...
  function onemklZheevd_scratchpad_size (line 4471) | int64_t onemklZheevd_scratchpad_size(syclQueue_t device_queue, onemklJob...
  function onemklCheevx_scratchpad_size (line 4477) | int64_t onemklCheevx_scratchpad_size(syclQueue_t device_queue, onemklCom...
  function onemklZheevx_scratchpad_size (line 4483) | int64_t onemklZheevx_scratchpad_size(syclQueue_t device_queue, onemklCom...
  function onemklChegvd_scratchpad_size (line 4489) | int64_t onemklChegvd_scratchpad_size(syclQueue_t device_queue, int64_t i...
  function onemklZhegvd_scratchpad_size (line 4495) | int64_t onemklZhegvd_scratchpad_size(syclQueue_t device_queue, int64_t i...
  function onemklChegvx_scratchpad_size (line 4501) | int64_t onemklChegvx_scratchpad_size(syclQueue_t device_queue, int64_t i...
  function onemklZhegvx_scratchpad_size (line 4507) | int64_t onemklZhegvx_scratchpad_size(syclQueue_t device_queue, int64_t i...
  function onemklChetrd_scratchpad_size (line 4513) | int64_t onemklChetrd_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklZhetrd_scratchpad_size (line 4519) | int64_t onemklZhetrd_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklChetrf_scratchpad_size (line 4525) | int64_t onemklChetrf_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklZhetrf_scratchpad_size (line 4531) | int64_t onemklZhetrf_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklSorgbr_scratchpad_size (line 4537) | int64_t onemklSorgbr_scratchpad_size(syclQueue_t device_queue, onemklGen...
  function onemklDorgbr_scratchpad_size (line 4543) | int64_t onemklDorgbr_scratchpad_size(syclQueue_t device_queue, onemklGen...
  function onemklSorgqr_scratchpad_size (line 4549) | int64_t onemklSorgqr_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklDorgqr_scratchpad_size (line 4555) | int64_t onemklDorgqr_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklSorgtr_scratchpad_size (line 4561) | int64_t onemklSorgtr_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklDorgtr_scratchpad_size (line 4567) | int64_t onemklDorgtr_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklSormqr_scratchpad_size (line 4573) | int64_t onemklSormqr_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklDormqr_scratchpad_size (line 4579) | int64_t onemklDormqr_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklSormrq_scratchpad_size (line 4585) | int64_t onemklSormrq_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklDormrq_scratchpad_size (line 4591) | int64_t onemklDormrq_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklSormtr_scratchpad_size (line 4597) | int64_t onemklSormtr_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklDormtr_scratchpad_size (line 4603) | int64_t onemklDormtr_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklSpotrf_scratchpad_size (line 4609) | int64_t onemklSpotrf_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklDpotrf_scratchpad_size (line 4615) | int64_t onemklDpotrf_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklCpotrf_scratchpad_size (line 4621) | int64_t onemklCpotrf_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklZpotrf_scratchpad_size (line 4627) | int64_t onemklZpotrf_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklSpotri_scratchpad_size (line 4633) | int64_t onemklSpotri_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklDpotri_scratchpad_size (line 4639) | int64_t onemklDpotri_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklCpotri_scratchpad_size (line 4645) | int64_t onemklCpotri_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklZpotri_scratchpad_size (line 4651) | int64_t onemklZpotri_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklSpotrs_scratchpad_size (line 4657) | int64_t onemklSpotrs_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklDpotrs_scratchpad_size (line 4663) | int64_t onemklDpotrs_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklCpotrs_scratchpad_size (line 4669) | int64_t onemklCpotrs_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklZpotrs_scratchpad_size (line 4675) | int64_t onemklZpotrs_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklSsteqr_scratchpad_size (line 4681) | int64_t onemklSsteqr_scratchpad_size(syclQueue_t device_queue, onemklCom...
  function onemklDsteqr_scratchpad_size (line 4687) | int64_t onemklDsteqr_scratchpad_size(syclQueue_t device_queue, onemklCom...
  function onemklCsteqr_scratchpad_size (line 4693) | int64_t onemklCsteqr_scratchpad_size(syclQueue_t device_queue, onemklCom...
  function onemklZsteqr_scratchpad_size (line 4699) | int64_t onemklZsteqr_scratchpad_size(syclQueue_t device_queue, onemklCom...
  function onemklSsyev_scratchpad_size (line 4705) | int64_t onemklSsyev_scratchpad_size(syclQueue_t device_queue, onemklComp...
  function onemklDsyev_scratchpad_size (line 4711) | int64_t onemklDsyev_scratchpad_size(syclQueue_t device_queue, onemklComp...
  function onemklSsyevd_scratchpad_size (line 4717) | int64_t onemklSsyevd_scratchpad_size(syclQueue_t device_queue, onemklJob...
  function onemklDsyevd_scratchpad_size (line 4723) | int64_t onemklDsyevd_scratchpad_size(syclQueue_t device_queue, onemklJob...
  function onemklSsyevx_scratchpad_size (line 4729) | int64_t onemklSsyevx_scratchpad_size(syclQueue_t device_queue, onemklCom...
  function onemklDsyevx_scratchpad_size (line 4735) | int64_t onemklDsyevx_scratchpad_size(syclQueue_t device_queue, onemklCom...
  function onemklSsygvd_scratchpad_size (line 4741) | int64_t onemklSsygvd_scratchpad_size(syclQueue_t device_queue, int64_t i...
  function onemklDsygvd_scratchpad_size (line 4747) | int64_t onemklDsygvd_scratchpad_size(syclQueue_t device_queue, int64_t i...
  function onemklSsygvx_scratchpad_size (line 4753) | int64_t onemklSsygvx_scratchpad_size(syclQueue_t device_queue, int64_t i...
  function onemklDsygvx_scratchpad_size (line 4759) | int64_t onemklDsygvx_scratchpad_size(syclQueue_t device_queue, int64_t i...
  function onemklSsytrd_scratchpad_size (line 4765) | int64_t onemklSsytrd_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklDsytrd_scratchpad_size (line 4771) | int64_t onemklDsytrd_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklSsytrf_scratchpad_size (line 4777) | int64_t onemklSsytrf_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklDsytrf_scratchpad_size (line 4783) | int64_t onemklDsytrf_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklCsytrf_scratchpad_size (line 4789) | int64_t onemklCsytrf_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklZsytrf_scratchpad_size (line 4795) | int64_t onemklZsytrf_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklStrtri_scratchpad_size (line 4801) | int64_t onemklStrtri_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklDtrtri_scratchpad_size (line 4807) | int64_t onemklDtrtri_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklCtrtri_scratchpad_size (line 4813) | int64_t onemklCtrtri_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklZtrtri_scratchpad_size (line 4819) | int64_t onemklZtrtri_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklStrtrs_scratchpad_size (line 4825) | int64_t onemklStrtrs_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklDtrtrs_scratchpad_size (line 4831) | int64_t onemklDtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklCtrtrs_scratchpad_size (line 4837) | int64_t onemklCtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklZtrtrs_scratchpad_size (line 4843) | int64_t onemklZtrtrs_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklCungbr_scratchpad_size (line 4849) | int64_t onemklCungbr_scratchpad_size(syclQueue_t device_queue, onemklGen...
  function onemklZungbr_scratchpad_size (line 4855) | int64_t onemklZungbr_scratchpad_size(syclQueue_t device_queue, onemklGen...
  function onemklCungqr_scratchpad_size (line 4861) | int64_t onemklCungqr_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklZungqr_scratchpad_size (line 4867) | int64_t onemklZungqr_scratchpad_size(syclQueue_t device_queue, int64_t m...
  function onemklCungtr_scratchpad_size (line 4873) | int64_t onemklCungtr_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklZungtr_scratchpad_size (line 4879) | int64_t onemklZungtr_scratchpad_size(syclQueue_t device_queue, onemklUpl...
  function onemklCunmqr_scratchpad_size (line 4885) | int64_t onemklCunmqr_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklZunmqr_scratchpad_size (line 4891) | int64_t onemklZunmqr_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklCunmrq_scratchpad_size (line 4897) | int64_t onemklCunmrq_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklZunmrq_scratchpad_size (line 4903) | int64_t onemklZunmrq_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklCunmtr_scratchpad_size (line 4909) | int64_t onemklCunmtr_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklZunmtr_scratchpad_size (line 4915) | int64_t onemklZunmtr_scratchpad_size(syclQueue_t device_queue, onemklSid...
  function onemklSgeinv_batch_scratchpad_size (line 4921) | int64_t onemklSgeinv_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklDgeinv_batch_scratchpad_size (line 4927) | int64_t onemklDgeinv_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklCgeinv_batch_scratchpad_size (line 4933) | int64_t onemklCgeinv_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklZgeinv_batch_scratchpad_size (line 4939) | int64_t onemklZgeinv_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklSgels_batch_scratchpad_size (line 4945) | int64_t onemklSgels_batch_scratchpad_size(syclQueue_t device_queue, onem...
  function onemklDgels_batch_scratchpad_size (line 4951) | int64_t onemklDgels_batch_scratchpad_size(syclQueue_t device_queue, onem...
  function onemklCgels_batch_scratchpad_size (line 4957) | int64_t onemklCgels_batch_scratchpad_size(syclQueue_t device_queue, onem...
  function onemklZgels_batch_scratchpad_size (line 4963) | int64_t onemklZgels_batch_scratchpad_size(syclQueue_t device_queue, onem...
  function onemklSgels_batch_strided_scratchpad_size (line 4969) | int64_t onemklSgels_batch_strided_scratchpad_size(syclQueue_t device_que...
  function onemklDgels_batch_strided_scratchpad_size (line 4975) | int64_t onemklDgels_batch_strided_scratchpad_size(syclQueue_t device_que...
  function onemklCgels_batch_strided_scratchpad_size (line 4981) | int64_t onemklCgels_batch_strided_scratchpad_size(syclQueue_t device_que...
  function onemklZgels_batch_strided_scratchpad_size (line 4987) | int64_t onemklZgels_batch_strided_scratchpad_size(syclQueue_t device_que...
  function onemklSgeqrf_batch_scratchpad_size (line 4993) | int64_t onemklSgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklDgeqrf_batch_scratchpad_size (line 4999) | int64_t onemklDgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklCgeqrf_batch_scratchpad_size (line 5005) | int64_t onemklCgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklZgeqrf_batch_scratchpad_size (line 5011) | int64_t onemklZgeqrf_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklSgeqrf_batch_strided_scratchpad_size (line 5017) | int64_t onemklSgeqrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklDgeqrf_batch_strided_scratchpad_size (line 5023) | int64_t onemklDgeqrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklCgeqrf_batch_strided_scratchpad_size (line 5029) | int64_t onemklCgeqrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklZgeqrf_batch_strided_scratchpad_size (line 5035) | int64_t onemklZgeqrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklSgesvda_batch_strided_scratchpad_size (line 5041) | int64_t onemklSgesvda_batch_strided_scratchpad_size(syclQueue_t device_q...
  function onemklDgesvda_batch_strided_scratchpad_size (line 5047) | int64_t onemklDgesvda_batch_strided_scratchpad_size(syclQueue_t device_q...
  function onemklCgesvda_batch_strided_scratchpad_size (line 5053) | int64_t onemklCgesvda_batch_strided_scratchpad_size(syclQueue_t device_q...
  function onemklZgesvda_batch_strided_scratchpad_size (line 5059) | int64_t onemklZgesvda_batch_strided_scratchpad_size(syclQueue_t device_q...
  function onemklSgetrf_batch_strided_scratchpad_size (line 5065) | int64_t onemklSgetrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklDgetrf_batch_strided_scratchpad_size (line 5071) | int64_t onemklDgetrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklCgetrf_batch_strided_scratchpad_size (line 5077) | int64_t onemklCgetrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklZgetrf_batch_strided_scratchpad_size (line 5083) | int64_t onemklZgetrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklSgetrf_batch_scratchpad_size (line 5089) | int64_t onemklSgetrf_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklDgetrf_batch_scratchpad_size (line 5095) | int64_t onemklDgetrf_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklCgetrf_batch_scratchpad_size (line 5101) | int64_t onemklCgetrf_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklZgetrf_batch_scratchpad_size (line 5107) | int64_t onemklZgetrf_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklSgetrfnp_batch_strided_scratchpad_size (line 5113) | int64_t onemklSgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_...
  function onemklDgetrfnp_batch_strided_scratchpad_size (line 5119) | int64_t onemklDgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_...
  function onemklCgetrfnp_batch_strided_scratchpad_size (line 5125) | int64_t onemklCgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_...
  function onemklZgetrfnp_batch_strided_scratchpad_size (line 5131) | int64_t onemklZgetrfnp_batch_strided_scratchpad_size(syclQueue_t device_...
  function onemklSgetrfnp_batch_scratchpad_size (line 5137) | int64_t onemklSgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, i...
  function onemklDgetrfnp_batch_scratchpad_size (line 5143) | int64_t onemklDgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, i...
  function onemklCgetrfnp_batch_scratchpad_size (line 5149) | int64_t onemklCgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, i...
  function onemklZgetrfnp_batch_scratchpad_size (line 5155) | int64_t onemklZgetrfnp_batch_scratchpad_size(syclQueue_t device_queue, i...
  function onemklSgetri_batch_scratchpad_size (line 5161) | int64_t onemklSgetri_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklDgetri_batch_scratchpad_size (line 5167) | int64_t onemklDgetri_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklCgetri_batch_scratchpad_size (line 5173) | int64_t onemklCgetri_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklZgetri_batch_scratchpad_size (line 5179) | int64_t onemklZgetri_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklSgetri_batch_strided_scratchpad_size (line 5185) | int64_t onemklSgetri_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklDgetri_batch_strided_scratchpad_size (line 5191) | int64_t onemklDgetri_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklCgetri_batch_strided_scratchpad_size (line 5197) | int64_t onemklCgetri_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklZgetri_batch_strided_scratchpad_size (line 5203) | int64_t onemklZgetri_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklSgetrs_batch_scratchpad_size (line 5209) | int64_t onemklSgetrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklDgetrs_batch_scratchpad_size (line 5215) | int64_t onemklDgetrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklCgetrs_batch_scratchpad_size (line 5221) | int64_t onemklCgetrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklZgetrs_batch_scratchpad_size (line 5227) | int64_t onemklZgetrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklSgetrs_batch_strided_scratchpad_size (line 5233) | int64_t onemklSgetrs_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklDgetrs_batch_strided_scratchpad_size (line 5239) | int64_t onemklDgetrs_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklCgetrs_batch_strided_scratchpad_size (line 5245) | int64_t onemklCgetrs_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklZgetrs_batch_strided_scratchpad_size (line 5251) | int64_t onemklZgetrs_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklSgetrsnp_batch_strided_scratchpad_size (line 5257) | int64_t onemklSgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_...
  function onemklDgetrsnp_batch_strided_scratchpad_size (line 5263) | int64_t onemklDgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_...
  function onemklCgetrsnp_batch_strided_scratchpad_size (line 5269) | int64_t onemklCgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_...
  function onemklZgetrsnp_batch_strided_scratchpad_size (line 5275) | int64_t onemklZgetrsnp_batch_strided_scratchpad_size(syclQueue_t device_...
  function onemklSorgqr_batch_scratchpad_size (line 5281) | int64_t onemklSorgqr_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklDorgqr_batch_scratchpad_size (line 5287) | int64_t onemklDorgqr_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklSorgqr_batch_strided_scratchpad_size (line 5293) | int64_t onemklSorgqr_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklDorgqr_batch_strided_scratchpad_size (line 5299) | int64_t onemklDorgqr_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklSormqr_batch_scratchpad_size (line 5305) | int64_t onemklSormqr_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklDormqr_batch_scratchpad_size (line 5311) | int64_t onemklDormqr_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklSpotrf_batch_scratchpad_size (line 5317) | int64_t onemklSpotrf_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklDpotrf_batch_scratchpad_size (line 5323) | int64_t onemklDpotrf_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklCpotrf_batch_scratchpad_size (line 5329) | int64_t onemklCpotrf_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklZpotrf_batch_scratchpad_size (line 5335) | int64_t onemklZpotrf_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklSpotrf_batch_strided_scratchpad_size (line 5341) | int64_t onemklSpotrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklDpotrf_batch_strided_scratchpad_size (line 5347) | int64_t onemklDpotrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklCpotrf_batch_strided_scratchpad_size (line 5353) | int64_t onemklCpotrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklZpotrf_batch_strided_scratchpad_size (line 5359) | int64_t onemklZpotrf_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklSpotrs_batch_scratchpad_size (line 5365) | int64_t onemklSpotrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklDpotrs_batch_scratchpad_size (line 5371) | int64_t onemklDpotrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklCpotrs_batch_scratchpad_size (line 5377) | int64_t onemklCpotrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklZpotrs_batch_scratchpad_size (line 5383) | int64_t onemklZpotrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklSpotrs_batch_strided_scratchpad_size (line 5389) | int64_t onemklSpotrs_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklDpotrs_batch_strided_scratchpad_size (line 5395) | int64_t onemklDpotrs_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklCpotrs_batch_strided_scratchpad_size (line 5401) | int64_t onemklCpotrs_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklZpotrs_batch_strided_scratchpad_size (line 5407) | int64_t onemklZpotrs_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklStrtrs_batch_scratchpad_size (line 5413) | int64_t onemklStrtrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklDtrtrs_batch_scratchpad_size (line 5419) | int64_t onemklDtrtrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklCtrtrs_batch_scratchpad_size (line 5425) | int64_t onemklCtrtrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklZtrtrs_batch_scratchpad_size (line 5431) | int64_t onemklZtrtrs_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklCungqr_batch_scratchpad_size (line 5437) | int64_t onemklCungqr_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklZungqr_batch_scratchpad_size (line 5443) | int64_t onemklZungqr_batch_scratchpad_size(syclQueue_t device_queue, int...
  function onemklCungqr_batch_strided_scratchpad_size (line 5449) | int64_t onemklCungqr_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklZungqr_batch_strided_scratchpad_size (line 5455) | int64_t onemklZungqr_batch_strided_scratchpad_size(syclQueue_t device_qu...
  function onemklCunmqr_batch_scratchpad_size (line 5461) | int64_t onemklCunmqr_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklZunmqr_batch_scratchpad_size (line 5467) | int64_t onemklZunmqr_batch_scratchpad_size(syclQueue_t device_queue, one...
  function onemklXsparse_init_matrix_handle (line 5474) | int onemklXsparse_init_matrix_handle(matrix_handle_t *p_spMat) {
  function onemklXsparse_release_matrix_handle (line 5479) | int onemklXsparse_release_matrix_handle(syclQueue_t device_queue, matrix...
  function onemklSsparse_set_csr_data (line 5487) | int onemklSsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t...
  function onemklSsparse_set_csr_data_64 (line 5495) | int onemklSsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handl...
  function onemklDsparse_set_csr_data (line 5503) | int onemklDsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t...
  function onemklDsparse_set_csr_data_64 (line 5511) | int onemklDsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handl...
  function onemklCsparse_set_csr_data (line 5519) | int onemklCsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t...
  function onemklCsparse_set_csr_data_64 (line 5527) | int onemklCsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handl...
  function onemklZsparse_set_csr_data (line 5535) | int onemklZsparse_set_csr_data(syclQueue_t device_queue, matrix_handle_t...
  function onemklZsparse_set_csr_data_64 (line 5543) | int onemklZsparse_set_csr_data_64(syclQueue_t device_queue, matrix_handl...
  function onemklSsparse_set_coo_data (line 5551) | int onemklSsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t...
  function onemklSsparse_set_coo_data_64 (line 5559) | int onemklSsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handl...
  function onemklDsparse_set_coo_data (line 5567) | int onemklDsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t...
  function onemklDsparse_set_coo_data_64 (line 5575) | int onemklDsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handl...
  function onemklCsparse_set_coo_data (line 5583) | int onemklCsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t...
  function onemklCsparse_set_coo_data_64 (line 5591) | int onemklCsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handl...
  function onemklZsparse_set_coo_data (line 5599) | int onemklZsparse_set_coo_data(syclQueue_t device_queue, matrix_handle_t...
  function onemklZsparse_set_coo_data_64 (line 5607) | int onemklZsparse_set_coo_data_64(syclQueue_t device_queue, matrix_handl...
  function onemklXsparse_init_matmat_descr (line 5615) | int onemklXsparse_init_matmat_descr(matmat_descr_t *p_desc) {
  function onemklXsparse_release_matmat_descr (line 5620) | int onemklXsparse_release_matmat_descr(matmat_descr_t *p_desc) {
  function onemklXsparse_init_omatconvert_descr (line 5625) | int onemklXsparse_init_omatconvert_descr(syclQueue_t device_queue, omatc...
  function onemklXsparse_release_omatconvert_descr (line 5633) | int onemklXsparse_release_omatconvert_descr(syclQueue_t device_queue, om...
  function onemklXsparse_init_omatadd_descr (line 5641) | int onemklXsparse_init_omatadd_descr(syclQueue_t device_queue, omatadd_d...
  function onemklXsparse_release_omatadd_descr (line 5649) | int onemklXsparse_release_omatadd_descr(syclQueue_t device_queue, omatad...
  function onemklXsparse_omatcopy (line 5657) | int onemklXsparse_omatcopy(syclQueue_t device_queue, onemklTranspose tra...
  function onemklXsparse_sort_matrix (line 5665) | int onemklXsparse_sort_matrix(syclQueue_t device_queue, matrix_handle_t ...
  function onemklSsparse_update_diagonal_values (line 5673) | int onemklSsparse_update_diagonal_values(syclQueue_t device_queue, matri...
  function onemklDsparse_update_diagonal_values (line 5681) | int onemklDsparse_update_diagonal_values(syclQueue_t device_queue, matri...
  function onemklCsparse_update_diagonal_values (line 5689) | int onemklCsparse_update_diagonal_values(syclQueue_t device_queue, matri...
  function onemklZsparse_update_diagonal_values (line 5697) | int onemklZsparse_update_diagonal_values(syclQueue_t device_queue, matri...
  function onemklXsparse_optimize_gemv (line 5705) | int onemklXsparse_optimize_gemv(syclQueue_t device_queue, onemklTranspos...
  function onemklXsparse_optimize_trmv (line 5713) | int onemklXsparse_optimize_trmv(syclQueue_t device_queue, onemklUplo upl...
  function onemklXsparse_optimize_trsv (line 5721) | int onemklXsparse_optimize_trsv(syclQueue_t device_queue, onemklUplo upl...
  function onemklXsparse_optimize_gemm (line 5729) | int onemklXsparse_optimize_gemm(syclQueue_t device_queue, onemklTranspos...
  function onemklXsparse_optimize_gemm_advanced (line 5737) | int onemklXsparse_optimize_gemm_advanced(syclQueue_t device_queue, onemk...
  function onemklXsparse_optimize_trsm (line 5745) | int onemklXsparse_optimize_trsm(syclQueue_t device_queue, onemklUplo upl...
  function onemklXsparse_optimize_trsm_advanced (line 5753) | int onemklXsparse_optimize_trsm_advanced(syclQueue_t device_queue, onemk...
  function onemklSsparse_gemv (line 5761) | int onemklSsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, fl...
  function onemklDsparse_gemv (line 5769) | int onemklDsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, do...
  function onemklCsparse_gemv (line 5777) | int onemklCsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, fl...
  function onemklZsparse_gemv (line 5785) | int onemklZsparse_gemv(syclQueue_t device_queue, onemklTranspose opA, do...
  function onemklSsparse_gemvdot (line 5793) | int onemklSsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA,...
  function onemklDsparse_gemvdot (line 5801) | int onemklDsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA,...
  function onemklCsparse_gemvdot (line 5809) | int onemklCsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA,...
  function onemklZsparse_gemvdot (line 5817) | int onemklZsparse_gemvdot(syclQueue_t device_queue, onemklTranspose opA,...
  function onemklSsparse_symv (line 5825) | int onemklSsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, fl...
  function onemklDsparse_symv (line 5833) | int onemklDsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, do...
  function onemklCsparse_symv (line 5841) | int onemklCsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, fl...
  function onemklZsparse_symv (line 5849) | int onemklZsparse_symv(syclQueue_t device_queue, onemklUplo uplo_val, do...
  function onemklSsparse_trmv (line 5857) | int onemklSsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, on...
  function onemklDsparse_trmv (line 5865) | int onemklDsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, on...
  function onemklCsparse_trmv (line 5873) | int onemklCsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, on...
  function onemklZsparse_trmv (line 5881) | int onemklZsparse_trmv(syclQueue_t device_queue, onemklUplo uplo_val, on...
  function onemklSsparse_trsv (line 5889) | int onemklSsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, on...
  function onemklDsparse_trsv (line 5897) | int onemklDsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, on...
  function onemklCsparse_trsv (line 5905) | int onemklCsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, on...
  function onemklZsparse_trsv (line 5913) | int onemklZsparse_trsv(syclQueue_t device_queue, onemklUplo uplo_val, on...
  function onemklSsparse_gemm (line 5921) | int onemklSsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val...
  function onemklDsparse_gemm (line 5929) | int onemklDsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val...
  function onemklCsparse_gemm (line 5937) | int onemklCsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val...
  function onemklZsparse_gemm (line 5945) | int onemklZsparse_gemm(syclQueue_t device_queue, onemklLayout layout_val...
  function onemklSsparse_trsm (line 5953) | int onemklSsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val...
  function onemklDsparse_trsm (line 5961) | int onemklDsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val...
  function onemklCsparse_trsm (line 5969) | int onemklCsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val...
  function onemklZsparse_trsm (line 5977) | int onemklZsparse_trsm(syclQueue_t device_queue, onemklLayout layout_val...
  function onemklXsparse_set_matmat_data (line 5985) | int onemklXsparse_set_matmat_data(matmat_descr_t descr, onemklMatrixView...
  function onemklSsparse_matmatd (line 5990) | int onemklSsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layou...
  function onemklDsparse_matmatd (line 5998) | int onemklDsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layou...
  function onemklCsparse_matmatd (line 6006) | int onemklCsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layou...
  function onemklZsparse_matmatd (line 6014) | int onemklZsparse_matmatd(syclQueue_t device_queue, onemklLayout c_layou...
  function onemklXsparse_matmat (line 6022) | int onemklXsparse_matmat(syclQueue_t device_queue, matrix_handle_t A, ma...
  function onemklDestroy (line 6034) | int onemklDestroy() {

FILE: deps/src/onemkl.h
  type onemklTranspose (line 13) | typedef enum {
  type onemklUplo (line 19) | typedef enum {
  type onemklDiag (line 24) | typedef enum {
  type onemklSide (line 29) | typedef enum {
  type onemklOffset (line 34) | typedef enum {
  type onemklJob (line 41) | typedef enum {
  type onemklGenerate (line 50) | typedef enum {
  type onemklCompz (line 57) | typedef enum {
  type onemklDirect (line 63) | typedef enum {
  type onemklStorev (line 68) | typedef enum {
  type onemklRangev (line 73) | typedef enum {
  type onemklOrder (line 79) | typedef enum {
  type onemklJobsvd (line 84) | typedef enum {
  type onemklLayout (line 91) | typedef enum {
  type onemklIndex (line 96) | typedef enum {
  type onemklProperty (line 102) | typedef enum {
  type onemklMatrixView (line 107) | typedef enum {
  type onemklMatmatRequest (line 111) | typedef enum {
  type onemklOmatconvertAlg (line 123) | typedef enum {
  type onemklOmataddAlg (line 127) | typedef enum {
  type matrix_handle (line 131) | struct matrix_handle
  type matrix_handle (line 132) | struct matrix_handle
  type matmat_descr (line 134) | struct matmat_descr
  type matmat_descr (line 135) | struct matmat_descr
  type omatconvert_descr (line 137) | struct omatconvert_descr
  type omatconvert_descr (line 138) | struct omatconvert_descr
  type omatadd_descr (line 140) | struct omatadd_descr
  type omatadd_descr (line 141) | struct omatadd_descr

FILE: deps/src/onemkl_dft.cpp
  type onemklDftDescriptor_st (line 13) | struct onemklDftDescriptor_st {
  function precision (line 19) | static inline precision to_prec(onemklDftPrecision p) {
  function domain (line 23) | static inline domain to_dom(onemklDftDomain d) {
  function allocate_descriptor (line 28) | static int allocate_descriptor(onemklDftDescriptor_t *out, precision p, ...
  function onemklDftCreate1D (line 49) | int onemklDftCreate1D(onemklDftDescriptor_t *desc,
  function onemklDftCreateND (line 57) | int onemklDftCreateND(onemklDftDescriptor_t *desc,
  function onemklDftDestroy (line 67) | int onemklDftDestroy(onemklDftDescriptor_t desc) {
  function onemklDftCommit (line 86) | int onemklDftCommit(onemklDftDescriptor_t desc, syclQueue_t queue) {
  function config_param (line 108) | static inline config_param to_param(onemklDftConfigParam p) {
  function config_value (line 134) | static inline config_value to_cvalue(onemklDftConfigValue v) {
  function onemklDftConfigValue (line 151) | static inline onemklDftConfigValue from_cvalue(config_value cv) {
  function onemklDftSetValueInt64 (line 186) | int onemklDftSetValueInt64(onemklDftDescriptor_t desc, onemklDftConfigPa...
  function onemklDftSetValueDouble (line 191) | int onemklDftSetValueDouble(onemklDftDescriptor_t desc, onemklDftConfigP...
  function onemklDftSetValueInt64Array (line 196) | int onemklDftSetValueInt64Array(onemklDftDescriptor_t desc, onemklDftCon...
  function onemklDftSetValueConfigValue (line 201) | int onemklDftSetValueConfigValue(onemklDftDescriptor_t desc, onemklDftCo...
  function onemklDftGetValueInt64 (line 206) | int onemklDftGetValueInt64(onemklDftDescriptor_t desc, onemklDftConfigPa...
  function onemklDftGetValueDouble (line 211) | int onemklDftGetValueDouble(onemklDftDescriptor_t desc, onemklDftConfigP...
  function onemklDftGetValueInt64Array (line 216) | int onemklDftGetValueInt64Array(onemklDftDescriptor_t desc, onemklDftCon...
  function onemklDftGetValueConfigValue (line 226) | int onemklDftGetValueConfigValue(onemklDftDescriptor_t desc, onemklDftCo...
  function onemklDftComputeForward (line 252) | int onemklDftComputeForward(onemklDftDescriptor_t desc, void *inout) {
  function onemklDftComputeForwardOutOfPlace (line 276) | int onemklDftComputeForwardOutOfPlace(onemklDftDescriptor_t desc, void *...
  function onemklDftComputeBackward (line 305) | int onemklDftComputeBackward(onemklDftDescriptor_t desc, void *inout) {
  function onemklDftComputeBackwardOutOfPlace (line 329) | int onemklDftComputeBackwardOutOfPlace(onemklDftDescriptor_t desc, void ...
  function make_buffer (line 363) | static inline sycl::buffer<T,1> make_buffer(T *ptr, int64_t n) {
  function get_element_count (line 368) | static int64_t get_element_count(onemklDftDescriptor_t desc) {
  function onemklDftComputeForwardBuffer (line 372) | int onemklDftComputeForwardBuffer(onemklDftDescriptor_t desc, void *inou...
  function onemklDftComputeForwardOutOfPlaceBuffer (line 384) | int onemklDftComputeForwardOutOfPlaceBuffer(onemklDftDescriptor_t desc, ...
  function onemklDftComputeBackwardBuffer (line 396) | int onemklDftComputeBackwardBuffer(onemklDftDescriptor_t desc, void *ino...
  function onemklDftComputeBackwardOutOfPlaceBuffer (line 408) | int onemklDftComputeBackwardOutOfPlaceBuffer(onemklDftDescriptor_t desc,...
  function onemklDftQueryParamIndices (line 426) | int onemklDftQueryParamIndices(int64_t *out, int64_t n) {

FILE: deps/src/onemkl_dft.h
  type onemklDftPrecision (line 22) | typedef enum {
  type onemklDftDomain (line 28) | typedef enum {
  type onemklDftConfigParam (line 34) | typedef enum {
  type onemklDftConfigValue (line 58) | typedef enum {
  type onemklDftDescriptor_st (line 73) | struct onemklDftDescriptor_st
  type onemklDftDescriptor_st (line 74) | struct onemklDftDescriptor_st

FILE: deps/src/sycl.cpp
  function syclPlatformCreate (line 7) | int syclPlatformCreate(syclPlatform_t *obj,
  function syclPlatformDestroy (line 15) | int syclPlatformDestroy(syclPlatform_t obj) {
  function syclDeviceCreate (line 20) | int syclDeviceCreate(syclDevice_t *obj, syclPlatform_t platform,
  function syclDeviceDestroy (line 28) | int syclDeviceDestroy(syclDevice_t obj) {
  function syclContextCreate (line 33) | int syclContextCreate(syclContext_t *obj, syclDevice_t *devices,
  function syclContextDestroy (line 52) | int syclContextDestroy(syclContext_t obj) {
  function syclQueueCreate (line 57) | int syclQueueCreate(syclQueue_t *obj, syclContext_t context,
  function syclQueueDestroy (line 74) | int syclQueueDestroy(syclQueue_t obj) {
  function syclQueueWait (line 79) | int syclQueueWait(syclQueue_t obj) {
  function syclEventCreate (line 84) | int syclEventCreate(syclEvent_t *obj, syclContext_t context,
  function syclEventDestroy (line 99) | int syclEventDestroy(syclEvent_t obj) {

FILE: deps/src/sycl.h
  type syclPlatform_st (line 11) | struct syclPlatform_st
  type syclDevice_st (line 15) | struct syclDevice_st
  type syclContext_st (line 20) | struct syclContext_st
  type syclQueue_st (line 25) | struct syclQueue_st
  type syclEvent_st (line 31) | struct syclEvent_st

FILE: deps/src/sycl.hpp
  type syclPlatform_st (line 7) | struct syclPlatform_st {
  type syclDevice_st (line 11) | struct syclDevice_st {
  type syclContext_st (line 15) | struct syclContext_st {
  type syclQueue_st (line 19) | struct syclQueue_st {
  type syclEvent_st (line 23) | struct syclEvent_st {
Condensed preview — 134 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,000K chars).
[
  {
    "path": ".buildkite/pipeline.yml",
    "chars": 2458,
    "preview": "steps:\n  # Test supported Julia versions\n  - group: \":julia: Julia\"\n    key: \"julia\"\n    steps:\n      - label: \"Julia {{"
  },
  {
    "path": ".github/dependabot.yml",
    "chars": 256,
    "preview": "# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates\nversion: 2\nupda"
  },
  {
    "path": ".github/workflows/CompatHelper.yml",
    "chars": 994,
    "preview": "name: CompatHelper\n\non:\n  schedule:\n    - cron: '0 0 * * *'\n  workflow_dispatch:\n\njobs:\n  CompatHelper:\n    runs-on: ubu"
  },
  {
    "path": ".github/workflows/DocsCleanup.yml",
    "chars": 746,
    "preview": "name: Doc Preview Cleanup\n\non:\n  pull_request:\n    types: [closed]\n\njobs:\n  doc-preview-cleanup:\n    runs-on: ubuntu-lat"
  },
  {
    "path": ".github/workflows/Format.yml",
    "chars": 3427,
    "preview": "name: 'Format'\n\non:\n  pull_request_target:\n    paths: ['**/*.jl']\n    types: [opened, synchronize, reopened, ready_for_r"
  },
  {
    "path": ".github/workflows/TagBot.yml",
    "chars": 319,
    "preview": "name: TagBot\n\non:\n  issue_comment:\n    types:\n      - created\n  workflow_dispatch:\n\njobs:\n  TagBot:\n    if: github.event"
  },
  {
    "path": ".github/workflows/ci.yml",
    "chars": 771,
    "preview": "name: CI\n\non:\n  push:\n    branches:\n      - master\n    tags: '*'\n  pull_request:\n    types: [opened, synchronize, reopen"
  },
  {
    "path": ".github/workflows/docs.yml",
    "chars": 661,
    "preview": "name: Documentation\n\non:\n  push:\n    branches:\n      - master\n    tags: '*'\n  pull_request:\n    types: [opened, synchron"
  },
  {
    "path": ".gitignore",
    "chars": 175,
    "preview": "LocalPreferences.toml\nManifest.toml\ndeps/onemkl_blas.cpp\ndeps/onemkl_blas.h\ndeps/onemkl_lapack.cpp\ndeps/onemkl_lapack.h\n"
  },
  {
    "path": "CITATION.cff",
    "chars": 378,
    "preview": "cff-version: 1.2.0\nmessage: \"If you use this software, please cite it as below.\"\nauthors:\n  - family-names: Besard\n    g"
  },
  {
    "path": "LICENSE.md",
    "chars": 1219,
    "preview": "The oneAPI.jl package is licensed under the MIT \"Expat\" License:\n\n> Copyright (c) 2020-present: Julia Computing and othe"
  },
  {
    "path": "Project.toml",
    "chars": 2129,
    "preview": "name = \"oneAPI\"\nuuid = \"8f75cd03-7ff8-4ecb-9b8f-daf728133b1b\"\nauthors = [\"Tim Besard <tim.besard@gmail.com>\", \"Alexis Mo"
  },
  {
    "path": "README.md",
    "chars": 10911,
    "preview": "# oneAPI.jl\n\n*Julia support for the oneAPI programming toolkit.*\n\n[![][doi-img]][doi-url] [![][buildkite-img]][buildkite"
  },
  {
    "path": "codecov.yml",
    "chars": 139,
    "preview": "coverage:\n  ignore:\n    - \"lib/*/lib*.jl\"\n    - \"src/device\"\n    - \"res/\"\n  status:\n    patch: false\n    project: false\n"
  },
  {
    "path": "deps/.clang-format",
    "chars": 50,
    "preview": "---\nIndentWidth: '4'\nMaxEmptyLinesToKeep: '2'\n...\n"
  },
  {
    "path": "deps/.gitignore",
    "chars": 30,
    "preview": "liboneapilib.so\nManifest.toml\n"
  },
  {
    "path": "deps/CMakeLists.txt",
    "chars": 822,
    "preview": "cmake_minimum_required(VERSION 3.13)\n\nset(CMAKE_CXX_STANDARD 17)\nset(CMAKE_CXX_STANDARD_REQUIRED ON)\n\n\nproject(oneAPISup"
  },
  {
    "path": "deps/Project.toml",
    "chars": 632,
    "preview": "[deps]\nCMake_jll = \"3f4e10e2-61f2-5801-8945-23b9d642d0e6\"\nConda = \"8f4d0f93-b110-5947-807f-2305c1781a2d\"\nDates = \"ade2ca"
  },
  {
    "path": "deps/build_ci.jl",
    "chars": 2093,
    "preview": "using Pkg\nPkg.activate(@__DIR__)\nPkg.instantiate()\n\nusing Git, Scratch, Dates\n\noneAPI = Base.UUID(\"8f75cd03-7ff8-4ecb-9b"
  },
  {
    "path": "deps/build_local.jl",
    "chars": 2817,
    "preview": "# build liboneapi_support with C wrappers for C++ APIs\n\nusing Pkg\nPkg.activate(@__DIR__)\nPkg.instantiate()\n\nif haskey(EN"
  },
  {
    "path": "deps/generate_helpers.jl",
    "chars": 6648,
    "preview": "non_parametric_routines = [\"init_matrix_handle\", \"release_matrix_handle\", \"set_matrix_property\",\n\"init_matmat_descr\", \"r"
  },
  {
    "path": "deps/generate_interfaces.jl",
    "chars": 30042,
    "preview": "using oneAPI_Support_Headers_jll\n\ninclude(\"generate_helpers.jl\")\n\ninclude_dir = joinpath(oneAPI_Support_Headers_jll.arti"
  },
  {
    "path": "deps/onemkl_epilogue.cpp",
    "chars": 887,
    "preview": "extern \"C\" int onemklXsparse_matmat(syclQueue_t device_queue, matrix_handle_t A, matrix_handle_t B, matrix_handle_t C, o"
  },
  {
    "path": "deps/onemkl_epilogue.h",
    "chars": 303,
    "preview": "int onemklXsparse_matmat(syclQueue_t device_queue, matrix_handle_t A, matrix_handle_t B,\n                         matrix"
  },
  {
    "path": "deps/onemkl_prologue.cpp",
    "chars": 23382,
    "preview": "#include \"onemkl.h\"\n#include \"sycl.hpp\"\n#include <iostream>\n#include <exception>\n#include <memory>\n#include <oneapi/mkl."
  },
  {
    "path": "deps/onemkl_prologue.h",
    "chars": 6456,
    "preview": "#pragma once\n\n#include \"sycl.h\"\n\n#include <stddef.h>\n#include <stdint.h>\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n// BLA"
  },
  {
    "path": "deps/src/onemkl.cpp",
    "chars": 407264,
    "preview": "#include \"onemkl.h\"\n#include \"sycl.hpp\"\n#include <iostream>\n#include <exception>\n#include <memory>\n#include <oneapi/mkl."
  },
  {
    "path": "deps/src/onemkl.h",
    "chars": 180898,
    "preview": "#pragma once\n\n#include \"sycl.h\"\n\n#include <stddef.h>\n#include <stdint.h>\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n// BLA"
  },
  {
    "path": "deps/src/onemkl_dft.cpp",
    "chars": 24744,
    "preview": "#include \"onemkl_dft.h\"\n#include \"sycl.hpp\"  // internal struct definitions\n\n#include <oneapi/mkl/dft.hpp>\n#include <vec"
  },
  {
    "path": "deps/src/onemkl_dft.h",
    "chars": 5107,
    "preview": "#pragma once\n\n#include \"sycl.h\"\n\n#include <stdint.h>\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n// Return codes (negative "
  },
  {
    "path": "deps/src/sycl.cpp",
    "chars": 3563,
    "preview": "#include \"sycl.hpp\"\n\n#include <sycl/ext/oneapi/backend/level_zero.hpp>\n\n// https://github.com/intel/llvm/blob/sycl/sycl/"
  },
  {
    "path": "deps/src/sycl.h",
    "chars": 1228,
    "preview": "#pragma once\n\n#include <stddef.h>\n\n#include <level_zero/ze_api.h>\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\ntypedef struc"
  },
  {
    "path": "deps/src/sycl.hpp",
    "chars": 305,
    "preview": "#pragma once\n\n#include \"sycl.h\"\n\n#include <sycl/sycl.hpp>\n\nstruct syclPlatform_st {\n    sycl::platform val;\n};\n\nstruct s"
  },
  {
    "path": "docs/Project.toml",
    "chars": 192,
    "preview": "[deps]\nDocumenter = \"e30172f5-a6a5-5a46-863b-614d45cd2de4\"\noneAPI = \"8f75cd03-7ff8-4ecb-9b8f-daf728133b1b\"\n\n[sources]\non"
  },
  {
    "path": "docs/make.jl",
    "chars": 1699,
    "preview": "using Pkg\n\nPkg.develop(PackageSpec(path=joinpath(dirname(@__FILE__), \"..\")))\n# # when first running instantiate\nPkg.inst"
  },
  {
    "path": "docs/src/api/arrays.md",
    "chars": 6222,
    "preview": "# Array Operations\n\nThis page documents the array types and operations provided by oneAPI.jl.\n\n## Array Types\n\n### Host-"
  },
  {
    "path": "docs/src/api/compiler.md",
    "chars": 5808,
    "preview": "# Compiler and Reflection\n\nThis page documents the compiler interface and code reflection tools for oneAPI.jl.\n\n## Code "
  },
  {
    "path": "docs/src/api/context.md",
    "chars": 3391,
    "preview": "# Context and Device Management\n\nThis page documents the API for managing Level Zero drivers, devices, and contexts in o"
  },
  {
    "path": "docs/src/api/kernels.md",
    "chars": 7672,
    "preview": "# Kernel Programming\n\nThis page documents the kernel programming API for writing custom GPU kernels in oneAPI.jl.\n\n## Ke"
  },
  {
    "path": "docs/src/api/memory.md",
    "chars": 7925,
    "preview": "# Memory Management\n\nThis page documents memory management in oneAPI.jl.\n\n## Memory Operations\n\n### `Base.unsafe_copyto!"
  },
  {
    "path": "docs/src/api.md",
    "chars": 1138,
    "preview": "# API Reference\n\nThis page provides an overview of the oneAPI.jl API. For detailed documentation, see the specific API r"
  },
  {
    "path": "docs/src/arrays.md",
    "chars": 1593,
    "preview": "# Array Programming\n\noneAPI.jl provides an array type, `oneArray`, which lives on the GPU. It implements the interface d"
  },
  {
    "path": "docs/src/device.md",
    "chars": 1255,
    "preview": "# Device Intrinsics\n\nWhen writing custom kernels, you have access to a set of device intrinsics that map to underlying h"
  },
  {
    "path": "docs/src/getting_started.md",
    "chars": 1036,
    "preview": "# Getting Started\n\n## Basic Usage\n\nThe most basic usage involves moving data to the GPU using `oneArray` and performing "
  },
  {
    "path": "docs/src/index.md",
    "chars": 1752,
    "preview": "# oneAPI.jl\n\n*Julia support for the oneAPI programming toolkit.*\n\noneAPI.jl provides support for working with the [oneAP"
  },
  {
    "path": "docs/src/installation.md",
    "chars": 4263,
    "preview": "# Installation\n\n## Requirements\n\noneAPI.jl requires:\n- **Julia**: 1.10 or higher\n- **OS**: Linux (recommended) or Window"
  },
  {
    "path": "docs/src/kernels.md",
    "chars": 1572,
    "preview": "# Kernel Programming\n\nFor maximum performance or custom operations not covered by high-level array abstractions, you can"
  },
  {
    "path": "docs/src/level_zero.md",
    "chars": 955,
    "preview": "# Level Zero Interface\n\nThe `oneL0` submodule provides low-level access to the Level Zero API, which gives you fine-grai"
  },
  {
    "path": "docs/src/memory.md",
    "chars": 1530,
    "preview": "# Memory Management\n\nEfficient memory management is crucial for GPU programming. oneAPI.jl provides tools to manage devi"
  },
  {
    "path": "docs/src/onemkl.md",
    "chars": 1303,
    "preview": "# oneMKL Integration\n\noneAPI.jl provides bindings to the Intel oneMKL library, enabling high-performance linear algebra "
  },
  {
    "path": "docs/src/troubleshooting.md",
    "chars": 1439,
    "preview": "# Troubleshooting\n\n## Common Issues\n\n### No devices detected\n\n**Symptom**: `oneAPI.devices()` returns an empty list.\n\n**"
  },
  {
    "path": "docs/src/usage/performance.md",
    "chars": 10015,
    "preview": "# Performance Guide\n\nThis guide provides tips and techniques for optimizing oneAPI.jl applications.\n\n## Quick Wins\n\n### "
  },
  {
    "path": "examples/gemm.jl",
    "chars": 138,
    "preview": "using oneAPI, Test\n\nA = oneArray(rand(Float32, 2, 3))\n\nB = oneArray(rand(Float32, 3, 4))\n\nC = A * B\n\n@test Array(C) ≈ Ar"
  },
  {
    "path": "examples/vadd.jl",
    "chars": 359,
    "preview": "using oneAPI, Test\n\nfunction vadd(a, b, c)\n    i = get_global_id()\n    @inbounds c[i] = a[i] + b[i]\n    return\nend\n\ndims"
  },
  {
    "path": "lib/level-zero/barrier.jl",
    "chars": 326,
    "preview": "export append_barrier!, device_barrier\n\nappend_barrier!(list::ZeCommandList, signal_event=nothing, wait_events::ZeEvent."
  },
  {
    "path": "lib/level-zero/cmdlist.jl",
    "chars": 1973,
    "preview": "# list\n\nexport ZeCommandList, execute!\n\nmutable struct ZeCommandList\n    handle::ze_command_list_handle_t\n\n    context::"
  },
  {
    "path": "lib/level-zero/cmdqueue.jl",
    "chars": 2609,
    "preview": "# queue\n\nexport ZeCommandQueue, synchronize\n\nmutable struct ZeCommandQueue\n    handle::ze_command_queue_handle_t\n\n    co"
  },
  {
    "path": "lib/level-zero/common.jl",
    "chars": 742,
    "preview": "\"\"\"\n    ze_make_version(major::Integer, minor::Integer) -> UInt32\n\n32-bit unsigned integer version number from major and"
  },
  {
    "path": "lib/level-zero/context.jl",
    "chars": 669,
    "preview": "export ZeContext, status\n\nmutable struct ZeContext\n    handle::ze_context_handle_t\n\n    driver::ZeDriver\n\n    function Z"
  },
  {
    "path": "lib/level-zero/copy.jl",
    "chars": 1204,
    "preview": "# copies\n\nexport append_copy!, append_fill!, append_prefetch!, append_advise!\n\nappend_copy!(list::ZeCommandList, dst::Un"
  },
  {
    "path": "lib/level-zero/device.jl",
    "chars": 7190,
    "preview": "export ZeDevice, properties, compute_properties, module_properties, memory_properties, memory_access_properties, cache_p"
  },
  {
    "path": "lib/level-zero/driver.jl",
    "chars": 3114,
    "preview": "export ZeDriver, api_version, properties, ipc_properties, extension_properties\n\nstruct ZeDriver\n    handle::ze_driver_ha"
  },
  {
    "path": "lib/level-zero/error.jl",
    "chars": 4415,
    "preview": "# Error type and decoding functionality\n\nexport ZeError\n\n\nstruct ZeError <: Exception\n    code::ze_result_t\nend\n\nBase.co"
  },
  {
    "path": "lib/level-zero/event.jl",
    "chars": 3274,
    "preview": "# pool\n\nexport ZeEventPool\n\nmutable struct ZeEventPool\n    handle::ze_event_pool_handle_t\n\n    context::ZeContext\n\n    f"
  },
  {
    "path": "lib/level-zero/fence.jl",
    "chars": 992,
    "preview": "# fence\n\nexport ZeFence\n\nmutable struct ZeFence\n    handle::ze_fence_handle_t\n    queue::ZeCommandQueue\n\n    function Ze"
  },
  {
    "path": "lib/level-zero/libze.jl",
    "chars": 252072,
    "preview": "using CEnum: CEnum, @cenum\n\n# outlined functionality to avoid GC frame allocation\n@noinline function throw_api_error(res"
  },
  {
    "path": "lib/level-zero/libze_aliases.jl",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/level-zero/memory.jl",
    "chars": 6974,
    "preview": "# Raw memory management\n\nexport device_alloc, host_alloc, shared_alloc, free, properties, lookup_alloc\n\n\n#\n# untyped buf"
  },
  {
    "path": "lib/level-zero/module.jl",
    "chars": 8711,
    "preview": "\n\nexport ZeModule\n\nmutable struct ZeModule\n    handle::ze_module_handle_t\n\n    context::ZeContext\n    device::ZeDevice\n\n"
  },
  {
    "path": "lib/level-zero/oneL0.jl",
    "chars": 6374,
    "preview": "module oneL0\n\nusing ..APIUtils\n\nusing CEnum\n\nusing Printf\n\nusing Libdl\n\nif Sys.iswindows()\n    const libze_loader = \"ze_"
  },
  {
    "path": "lib/level-zero/pointer.jl",
    "chars": 6406,
    "preview": "# pointer types\n\nexport ZePtr, ZE_NULL, PtrOrZePtr, ZeRef, RefOrZeRef\n\n\n#\n# Device pointer\n#\n\n\"\"\"\n    ZePtr{T}\n\nA memory"
  },
  {
    "path": "lib/level-zero/residency.jl",
    "chars": 327,
    "preview": "export make_resident, evict\n\n\n## memory\n\nfunction make_resident(ctx::ZeContext, dev::ZeDevice, buf::AbstractBuffer, size"
  },
  {
    "path": "lib/level-zero/utils.jl",
    "chars": 1411,
    "preview": "isdebug(group) = Base.CoreLogging.current_logger_for_env(Base.CoreLogging.Debug, group, oneL0) !== nothing\n\n# Registered"
  },
  {
    "path": "lib/mkl/array.jl",
    "chars": 2262,
    "preview": "export oneSparseMatrixCSR, oneSparseMatrixCSC, oneSparseMatrixCOO\n\nabstract type oneAbstractSparseArray{Tv, Ti, N} <: Ab"
  },
  {
    "path": "lib/mkl/fft.jl",
    "chars": 24642,
    "preview": "# oneMKL FFT (DFT) high-level Julia interface\n# Inspired by AMDGPU ROCFFT interface style, adapted to oneMKL DFT C wrapp"
  },
  {
    "path": "lib/mkl/interfaces.jl",
    "chars": 2911,
    "preview": "# interfacing with other packages\n\nusing LinearAlgebra: BlasComplex, BlasFloat, BlasReal, MulAddMul\n\n# legacy methods wi"
  },
  {
    "path": "lib/mkl/linalg.jl",
    "chars": 8885,
    "preview": "# interfacing with LinearAlgebra standard library\n\nimport LinearAlgebra\nusing LinearAlgebra: Transpose, Adjoint,\n       "
  },
  {
    "path": "lib/mkl/oneMKL.jl",
    "chars": 1893,
    "preview": "module oneMKL\n\nusing ..oneAPI\nusing ..oneAPI: unsafe_free!\nusing ..oneL0\n\nusing ..Support\n\nusing ..SYCL\nusing ..SYCL: sy"
  },
  {
    "path": "lib/mkl/utils.jl",
    "chars": 2919,
    "preview": "#\n# Auxiliary\n#\n\nfunction Base.convert(::Type{onemklSide}, side::Char)\n    if side == 'L'\n        return ONEMKL_SIDE_LEF"
  },
  {
    "path": "lib/mkl/wrappers_blas.jl",
    "chars": 49382,
    "preview": "## (GE) general matrix-matrix multiplication batched\nfor (fname, elty) in\n        ((:onemklHgemm_batch, :Float16),\n     "
  },
  {
    "path": "lib/mkl/wrappers_lapack.jl",
    "chars": 29500,
    "preview": "# potrf\nfor (bname, fname, elty) in ((:onemklSpotrf_scratchpad_size, :onemklSpotrf, :Float32),\n                         "
  },
  {
    "path": "lib/mkl/wrappers_sparse.jl",
    "chars": 30009,
    "preview": "# Deferred release queue for sparse matrix handles.\n# Finalizers run on the GC thread, but onemklXsparse_release_matrix_"
  },
  {
    "path": "lib/support/Support.jl",
    "chars": 667,
    "preview": "module Support\n\nusing ..oneAPI\n\nusing ..oneL0\n\nusing ..oneL0:\n  ze_driver_handle_t, ze_device_handle_t, ze_context_handl"
  },
  {
    "path": "lib/support/liboneapi_support.jl",
    "chars": 454474,
    "preview": "using CEnum: CEnum, @cenum\n\nmutable struct syclPlatform_st end\n\nconst syclPlatform_t = Ptr{syclPlatform_st}\n\nfunction sy"
  },
  {
    "path": "lib/sycl/SYCL.jl",
    "chars": 2284,
    "preview": "module SYCL\n\nusing ..oneAPI\n\nusing ..oneL0\n\nusing ..Support\n\nexport syclPlatform, syclDevice, syclContext, syclQueue, sy"
  },
  {
    "path": "lib/utils/APIUtils.jl",
    "chars": 158,
    "preview": "module APIUtils\n\n# helpers that facilitate working with C APIs\nusing GPUToolbox: @checked, @debug_ccall\nexport @checked,"
  },
  {
    "path": "lib/utils/enum.jl",
    "chars": 853,
    "preview": "export @enum_without_prefix\n\n\n## redeclare enum values without a prefix\n\n# this is useful when enum values from an under"
  },
  {
    "path": "res/Project.toml",
    "chars": 181,
    "preview": "[deps]\nClang = \"40e3b903-d033-50b4-a0cc-940c62c95e31\"\nJuliaFormatter = \"98e50ef6-434e-11e9-1051-2b60c6c9e899\"\noneAPI_Lev"
  },
  {
    "path": "res/libze_prologue.jl",
    "chars": 541,
    "preview": "# outlined functionality to avoid GC frame allocation\n@noinline function throw_api_error(res)\n    if res == RESULT_ERROR"
  },
  {
    "path": "res/local.jl",
    "chars": 2525,
    "preview": "## generate preferences for loading a local copy of the oneAPI toolchain\n\n#\n# discovery\n#\n\nimport Libdl\n\nfunction scan_l"
  },
  {
    "path": "res/support.toml",
    "chars": 10680,
    "preview": "[general]\nlibrary_name = \"liboneapi_support\"\noutput_file_path = \"../lib/support/liboneapi_support.jl\"\n\n\n[codegen]\nuse_cc"
  },
  {
    "path": "res/wrap.jl",
    "chars": 4068,
    "preview": "# script to parse oneAPI headers and generate Julia wrappers\n\n\n#\n# Parsing\n#\n\nusing Clang\nusing Clang.Generators\n\nusing "
  },
  {
    "path": "res/ze.toml",
    "chars": 1343,
    "preview": "[general]\nlibrary_name = \"libze_loader\"\noutput_file_path = \"../lib/level-zero/libze.jl\"\nprologue_file_path = \"./libze_pr"
  },
  {
    "path": "src/accumulate.jl",
    "chars": 1005,
    "preview": "import oneAPI\nimport oneAPI: oneArray, oneAPIBackend\nimport AcceleratedKernels as AK\n\n# Use a smaller block size on Inte"
  },
  {
    "path": "src/array.jl",
    "chars": 21006,
    "preview": "export oneArray, oneVector, oneMatrix, oneVecOrMat,\n       is_device, is_shared, is_host\n\n\n## array type\n\nfunction hasfi"
  },
  {
    "path": "src/broadcast.jl",
    "chars": 853,
    "preview": "import Base.Broadcast: BroadcastStyle, Broadcasted\n\nstruct oneArrayStyle{N,B} <: AbstractGPUArrayStyle{N} end\noneArraySt"
  },
  {
    "path": "src/compiler/compilation.jl",
    "chars": 7930,
    "preview": "## gpucompiler interface implementation\n\nstruct oneAPICompilerParams <: AbstractCompilerParams end\nconst oneAPICompilerC"
  },
  {
    "path": "src/compiler/execution.jl",
    "chars": 9574,
    "preview": "export @oneapi, zefunction, kernel_convert\n\n\n## high-level @oneapi interface\n\nconst MACRO_KWARGS = [:launch]\nconst COMPI"
  },
  {
    "path": "src/compiler/reflection.jl",
    "chars": 2663,
    "preview": "# code reflection entry-points\n\n# TODO: get and disassemble the native binary using oneL0\n\n#\n# code_* replacements\n#\n\n# "
  },
  {
    "path": "src/context.jl",
    "chars": 8188,
    "preview": "# context management and global state\n\n# to avoid CUDA-style implicit state, where operations can fail if they are accid"
  },
  {
    "path": "src/device/array.jl",
    "chars": 11602,
    "preview": "# Contiguous on-device arrays\n\nexport oneDeviceArray, oneDeviceVector, oneDeviceMatrix, oneLocalArray\n\n\n## construction\n"
  },
  {
    "path": "src/device/atomics.jl",
    "chars": 892,
    "preview": "# Atomic operation device overrides and fallbacks\n\n# Fallback wrappers for Float32 atomic_inc!/atomic_dec!\n# Intel Level"
  },
  {
    "path": "src/device/quirks.jl",
    "chars": 2743,
    "preview": "macro print_and_throw(args...)\n    quote\n        @println \"ERROR: \" $(args...) \".\"\n        throw(nothing)\n    end\nend\n\n#"
  },
  {
    "path": "src/device/runtime.jl",
    "chars": 858,
    "preview": "# device runtime libraries\n\n\n## Julia library\n\n# reset the runtime cache from global scope, so that any change triggers "
  },
  {
    "path": "src/gpuarrays.jl",
    "chars": 376,
    "preview": "# GPUArrays.jl interface\n\nconst GLOBAL_RNGs = Dict{ZeDevice,GPUArrays.RNG}()\nfunction GPUArrays.default_rng(::Type{<:one"
  },
  {
    "path": "src/indexing.jl",
    "chars": 926,
    "preview": "Base.to_index(::oneArray, I::AbstractArray{Bool}) = findall(I)\n\nif VERSION >= v\"1.11.0-DEV.1157\"\n    Base.to_indices(x::"
  },
  {
    "path": "src/mapreduce.jl",
    "chars": 7903,
    "preview": "## COV_EXCL_START\n\n# TODO\n# - serial version for lower latency\n# - group-stride loop to delay need for second kernel lau"
  },
  {
    "path": "src/memory.jl",
    "chars": 2024,
    "preview": "# memory operations\n\n\"\"\"\n    Base.unsafe_copyto!(ctx::ZeContext, dev::ZeDevice, dst, src, N)\n\nLow-level memory copy oper"
  },
  {
    "path": "src/oneAPI.jl",
    "chars": 2500,
    "preview": "module oneAPI\n\nusing GPUArrays\nusing Adapt\n\nusing GPUCompiler\n\nimport ExprTools\n\nusing SpecialFunctions\n\nimport Preferen"
  },
  {
    "path": "src/oneAPIKernels.jl",
    "chars": 8342,
    "preview": "module oneAPIKernels\n\nusing ..oneAPI\nusing ..oneAPI: @device_override, SPIRVIntrinsics, method_table\n\nimport KernelAbstr"
  },
  {
    "path": "src/pool.jl",
    "chars": 3237,
    "preview": "# Track total allocated GPU memory (device + shared buffers) for proactive GC.\n# This mirrors AMDGPU.jl's approach: trig"
  },
  {
    "path": "src/random.jl",
    "chars": 987,
    "preview": "using Random\n\ngpuarrays_rng() = GPUArrays.default_rng(oneArray)\n\n# GPUArrays in-place\nRandom.rand!(A::oneWrappedArray) ="
  },
  {
    "path": "src/sorting.jl",
    "chars": 258,
    "preview": "Base.sort!(x::oneArray; kwargs...) = (AK.sort!(x; kwargs...); return x)\nBase.sortperm!(ix::oneArray, x::oneArray; kwargs"
  },
  {
    "path": "src/utils.jl",
    "chars": 2456,
    "preview": "\nfunction versioninfo(io::IO=stdout)\n    if Sys.islinux()\n        println(io, \"Binary dependencies:\")\n        for jll in"
  },
  {
    "path": "test/Project.toml",
    "chars": 1139,
    "preview": "[deps]\nAbstractFFTs = \"621f4979-c628-5d54-868e-fcf4e3e8185c\"\nAdapt = \"79e6a3ab-5dfb-504d-930d-738a2a938a0e\"\nDates = \"ade"
  },
  {
    "path": "test/array.jl",
    "chars": 2882,
    "preview": "using LinearAlgebra\nimport Adapt\n\n@testset \"constructors\" begin\n  xs = oneArray{Int}(undef, 2, 3)\n  @test collect(oneArr"
  },
  {
    "path": "test/device/intrinsics.jl",
    "chars": 15382,
    "preview": "@testset \"work items\" begin\n    @on_device get_work_dim() |> sink\n\n    @on_device get_global_size() |> sink\n    @on_devi"
  },
  {
    "path": "test/dummy.ll",
    "chars": 969,
    "preview": "target datalayout = \"e-p:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024\"\ntarget "
  },
  {
    "path": "test/dummy.spt",
    "chars": 496,
    "preview": "119734787 65536 393230 12 0 \n2 Capability Addresses \n2 Capability Kernel \n5 ExtInstImport 1 \"OpenCL.std\"\n3 MemoryModel 1"
  },
  {
    "path": "test/examples.jl",
    "chars": 696,
    "preview": "@testset \"examples\" begin\n\nfunction find_sources(path::String, sources=String[])\n    if isdir(path)\n        for entry in"
  },
  {
    "path": "test/execution.jl",
    "chars": 13402,
    "preview": "import Adapt\n\nusing StaticArrays\n\ndummy() = return\n\n@testset \"@oneapi\" begin\n\n@test_throws UndefVarError @oneapi undefin"
  },
  {
    "path": "test/fft.jl",
    "chars": 3115,
    "preview": "using Test\nusing oneAPI\nusing oneAPI.oneMKL.FFT\nusing AbstractFFTs\nusing FFTW\nusing Random\nRandom.seed!(1234)\n\n# Helper "
  },
  {
    "path": "test/indexing.jl",
    "chars": 3510,
    "preview": "using Test\nusing oneAPI\n\n@testset \"findall\" begin\n    bools1d = oneArray([true, false, true, false, true])\n    @test Arr"
  },
  {
    "path": "test/kernelabstractions.jl",
    "chars": 278,
    "preview": "import KernelAbstractions\ninclude(joinpath(dirname(pathof(KernelAbstractions)), \"..\", \"test\", \"testsuite.jl\"))\n\nskip_tes"
  },
  {
    "path": "test/level-zero.jl",
    "chars": 7031,
    "preview": "using oneAPI.oneL0\n\n# ensure that the driver we loaded is a versioned library, matching the Level Zero loader.\n# otherwi"
  },
  {
    "path": "test/onemkl.jl",
    "chars": 53741,
    "preview": "if Sys.iswindows()\n@warn \"Skipping unsupported oneKML tests\"\nelse\n\nusing oneAPI\n    using oneAPI.oneMKL: band, bandex, o"
  },
  {
    "path": "test/pointer.jl",
    "chars": 3468,
    "preview": "using oneAPI.oneL0\n\n# constructors\nvoidptr_a = ZePtr{Cvoid}(Int(0xDEADBEEF))\n@test reinterpret(Ptr{Cvoid}, voidptr_a) =="
  },
  {
    "path": "test/random.jl",
    "chars": 922,
    "preview": "using Random\n\n@testset \"rand\" begin\n\n# in-place\nfor (f,T) in ((rand!,Float16),\n              (rand!,Float32),\n          "
  },
  {
    "path": "test/runtests.jl",
    "chars": 13800,
    "preview": "using Distributed\nusing Dates\nimport REPL\nusing Printf: @sprintf\nusing Base.Filesystem: path_separator\n\n# parse some com"
  },
  {
    "path": "test/setup.jl",
    "chars": 4311,
    "preview": "using Distributed, Test, oneAPI\n\noneAPI.functional() || error(\"oneAPI.jl is not functional on this system\")\n\n# GPUArrays"
  },
  {
    "path": "test/sorting.jl",
    "chars": 496,
    "preview": "using Test\nusing oneAPI\n\n@testset \"sorting\" begin\n    data = oneArray([3, 1, 4, 1, 5])\n    sort!(data)\n    @test Array(d"
  },
  {
    "path": "test/sycl.jl",
    "chars": 527,
    "preview": "if Sys.iswindows()\n@warn \"Skipping unsupported SYCL tests\"\nelse\n\nusing oneAPI.oneL0, oneAPI.SYCL\n\n@test sycl_platform() "
  }
]

// ... and 2 more files (download for full content)

About this extraction

This page contains the full source code of the JuliaGPU/oneAPI.jl GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 134 files (1.9 MB), approximately 519.5k tokens, and a symbol index with 919 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!