Repository: SciRuby/nmatrix
Branch: master
Commit: beb266ed1ae5
Files: 285
Total size: 3.0 MB

Directory structure:
gitextract_ehxqtovs/

├── .gitignore
├── .rspec
├── .travis.yml
├── CONTRIBUTING.md
├── Gemfile
├── History.txt
├── LICENSE.txt
├── Manifest.txt
├── README.rdoc
├── Rakefile
├── ext/
│   ├── nmatrix/
│   │   ├── binary_format.txt
│   │   ├── data/
│   │   │   ├── complex.h
│   │   │   ├── data.cpp
│   │   │   ├── data.h
│   │   │   ├── meta.h
│   │   │   └── ruby_object.h
│   │   ├── extconf.rb
│   │   ├── math/
│   │   │   ├── asum.h
│   │   │   ├── cblas_enums.h
│   │   │   ├── cblas_templates_core.h
│   │   │   ├── gemm.h
│   │   │   ├── gemv.h
│   │   │   ├── getrf.h
│   │   │   ├── getrs.h
│   │   │   ├── imax.h
│   │   │   ├── laswp.h
│   │   │   ├── long_dtype.h
│   │   │   ├── magnitude.h
│   │   │   ├── math.h
│   │   │   ├── nrm2.h
│   │   │   ├── rot.h
│   │   │   ├── rotg.h
│   │   │   ├── scal.h
│   │   │   ├── trsm.h
│   │   │   └── util.h
│   │   ├── math.cpp
│   │   ├── nm_memory.h
│   │   ├── nmatrix.cpp
│   │   ├── nmatrix.h
│   │   ├── ruby_constants.cpp
│   │   ├── ruby_constants.h
│   │   ├── ruby_nmatrix.c
│   │   ├── storage/
│   │   │   ├── common.cpp
│   │   │   ├── common.h
│   │   │   ├── dense/
│   │   │   │   ├── dense.cpp
│   │   │   │   └── dense.h
│   │   │   ├── list/
│   │   │   │   ├── list.cpp
│   │   │   │   └── list.h
│   │   │   ├── storage.cpp
│   │   │   ├── storage.h
│   │   │   └── yale/
│   │   │       ├── class.h
│   │   │       ├── iterators/
│   │   │       │   ├── base.h
│   │   │       │   ├── iterator.h
│   │   │       │   ├── row.h
│   │   │       │   ├── row_stored.h
│   │   │       │   ├── row_stored_nd.h
│   │   │       │   └── stored_diagonal.h
│   │   │       ├── math/
│   │   │       │   └── transpose.h
│   │   │       ├── yale.cpp
│   │   │       └── yale.h
│   │   ├── types.h
│   │   └── util/
│   │       ├── io.cpp
│   │       ├── io.h
│   │       ├── sl_list.cpp
│   │       ├── sl_list.h
│   │       └── util.h
│   ├── nmatrix_atlas/
│   │   ├── extconf.rb
│   │   ├── math_atlas/
│   │   │   ├── cblas_templates_atlas.h
│   │   │   ├── clapack_templates.h
│   │   │   ├── geev.h
│   │   │   ├── gesdd.h
│   │   │   ├── gesvd.h
│   │   │   └── inc.h
│   │   ├── math_atlas.cpp
│   │   └── nmatrix_atlas.cpp
│   ├── nmatrix_fftw/
│   │   ├── extconf.rb
│   │   └── nmatrix_fftw.cpp
│   ├── nmatrix_java/
│   │   ├── nmatrix/
│   │   │   ├── math/
│   │   │   │   └── MathHelper.java
│   │   │   ├── storage/
│   │   │   │   └── dense/
│   │   │   │       └── ArrayComparator.java
│   │   │   └── util/
│   │   │       ├── ArrayGenerator.java
│   │   │       ├── MatrixGenerator.java
│   │   │       └── WrapperType.java
│   │   └── test/
│   │       ├── AssertTests.java
│   │       └── TestRunner.java
│   └── nmatrix_lapacke/
│       ├── extconf.rb
│       ├── lapacke/
│       │   ├── include/
│       │   │   ├── lapacke.h
│       │   │   ├── lapacke_config.h
│       │   │   ├── lapacke_mangling.h
│       │   │   ├── lapacke_mangling_with_flags.h
│       │   │   └── lapacke_utils.h
│       │   ├── src/
│       │   │   ├── lapacke_cgeev.c
│       │   │   ├── lapacke_cgeev_work.c
│       │   │   ├── lapacke_cgeqrf.c
│       │   │   ├── lapacke_cgeqrf_work.c
│       │   │   ├── lapacke_cgesdd.c
│       │   │   ├── lapacke_cgesdd_work.c
│       │   │   ├── lapacke_cgesvd.c
│       │   │   ├── lapacke_cgesvd_work.c
│       │   │   ├── lapacke_cgetrf.c
│       │   │   ├── lapacke_cgetrf_work.c
│       │   │   ├── lapacke_cgetri.c
│       │   │   ├── lapacke_cgetri_work.c
│       │   │   ├── lapacke_cgetrs.c
│       │   │   ├── lapacke_cgetrs_work.c
│       │   │   ├── lapacke_cpotrf.c
│       │   │   ├── lapacke_cpotrf_work.c
│       │   │   ├── lapacke_cpotri.c
│       │   │   ├── lapacke_cpotri_work.c
│       │   │   ├── lapacke_cpotrs.c
│       │   │   ├── lapacke_cpotrs_work.c
│       │   │   ├── lapacke_cunmqr.c
│       │   │   ├── lapacke_cunmqr_work.c
│       │   │   ├── lapacke_dgeev.c
│       │   │   ├── lapacke_dgeev_work.c
│       │   │   ├── lapacke_dgeqrf.c
│       │   │   ├── lapacke_dgeqrf_work.c
│       │   │   ├── lapacke_dgesdd.c
│       │   │   ├── lapacke_dgesdd_work.c
│       │   │   ├── lapacke_dgesvd.c
│       │   │   ├── lapacke_dgesvd_work.c
│       │   │   ├── lapacke_dgetrf.c
│       │   │   ├── lapacke_dgetrf_work.c
│       │   │   ├── lapacke_dgetri.c
│       │   │   ├── lapacke_dgetri_work.c
│       │   │   ├── lapacke_dgetrs.c
│       │   │   ├── lapacke_dgetrs_work.c
│       │   │   ├── lapacke_dormqr.c
│       │   │   ├── lapacke_dormqr_work.c
│       │   │   ├── lapacke_dpotrf.c
│       │   │   ├── lapacke_dpotrf_work.c
│       │   │   ├── lapacke_dpotri.c
│       │   │   ├── lapacke_dpotri_work.c
│       │   │   ├── lapacke_dpotrs.c
│       │   │   ├── lapacke_dpotrs_work.c
│       │   │   ├── lapacke_sgeev.c
│       │   │   ├── lapacke_sgeev_work.c
│       │   │   ├── lapacke_sgeqrf.c
│       │   │   ├── lapacke_sgeqrf_work.c
│       │   │   ├── lapacke_sgesdd.c
│       │   │   ├── lapacke_sgesdd_work.c
│       │   │   ├── lapacke_sgesvd.c
│       │   │   ├── lapacke_sgesvd_work.c
│       │   │   ├── lapacke_sgetrf.c
│       │   │   ├── lapacke_sgetrf_work.c
│       │   │   ├── lapacke_sgetri.c
│       │   │   ├── lapacke_sgetri_work.c
│       │   │   ├── lapacke_sgetrs.c
│       │   │   ├── lapacke_sgetrs_work.c
│       │   │   ├── lapacke_sormqr.c
│       │   │   ├── lapacke_sormqr_work.c
│       │   │   ├── lapacke_spotrf.c
│       │   │   ├── lapacke_spotrf_work.c
│       │   │   ├── lapacke_spotri.c
│       │   │   ├── lapacke_spotri_work.c
│       │   │   ├── lapacke_spotrs.c
│       │   │   ├── lapacke_spotrs_work.c
│       │   │   ├── lapacke_zgeev.c
│       │   │   ├── lapacke_zgeev_work.c
│       │   │   ├── lapacke_zgeqrf.c
│       │   │   ├── lapacke_zgeqrf_work.c
│       │   │   ├── lapacke_zgesdd.c
│       │   │   ├── lapacke_zgesdd_work.c
│       │   │   ├── lapacke_zgesvd.c
│       │   │   ├── lapacke_zgesvd_work.c
│       │   │   ├── lapacke_zgetrf.c
│       │   │   ├── lapacke_zgetrf_work.c
│       │   │   ├── lapacke_zgetri.c
│       │   │   ├── lapacke_zgetri_work.c
│       │   │   ├── lapacke_zgetrs.c
│       │   │   ├── lapacke_zgetrs_work.c
│       │   │   ├── lapacke_zpotrf.c
│       │   │   ├── lapacke_zpotrf_work.c
│       │   │   ├── lapacke_zpotri.c
│       │   │   ├── lapacke_zpotri_work.c
│       │   │   ├── lapacke_zpotrs.c
│       │   │   ├── lapacke_zpotrs_work.c
│       │   │   ├── lapacke_zunmqr.c
│       │   │   └── lapacke_zunmqr_work.c
│       │   └── utils/
│       │       ├── lapacke_c_nancheck.c
│       │       ├── lapacke_cge_nancheck.c
│       │       ├── lapacke_cge_trans.c
│       │       ├── lapacke_cpo_nancheck.c
│       │       ├── lapacke_cpo_trans.c
│       │       ├── lapacke_ctr_nancheck.c
│       │       ├── lapacke_ctr_trans.c
│       │       ├── lapacke_d_nancheck.c
│       │       ├── lapacke_dge_nancheck.c
│       │       ├── lapacke_dge_trans.c
│       │       ├── lapacke_dpo_nancheck.c
│       │       ├── lapacke_dpo_trans.c
│       │       ├── lapacke_dtr_nancheck.c
│       │       ├── lapacke_dtr_trans.c
│       │       ├── lapacke_lsame.c
│       │       ├── lapacke_s_nancheck.c
│       │       ├── lapacke_sge_nancheck.c
│       │       ├── lapacke_sge_trans.c
│       │       ├── lapacke_spo_nancheck.c
│       │       ├── lapacke_spo_trans.c
│       │       ├── lapacke_str_nancheck.c
│       │       ├── lapacke_str_trans.c
│       │       ├── lapacke_xerbla.c
│       │       ├── lapacke_z_nancheck.c
│       │       ├── lapacke_zge_nancheck.c
│       │       ├── lapacke_zge_trans.c
│       │       ├── lapacke_zpo_nancheck.c
│       │       ├── lapacke_zpo_trans.c
│       │       ├── lapacke_ztr_nancheck.c
│       │       └── lapacke_ztr_trans.c
│       ├── lapacke.cpp
│       ├── lapacke_nmatrix.h
│       ├── make_lapacke_cpp.rb
│       ├── math_lapacke/
│       │   ├── cblas_local.h
│       │   ├── cblas_templates_lapacke.h
│       │   └── lapacke_templates.h
│       ├── math_lapacke.cpp
│       └── nmatrix_lapacke.cpp
├── lib/
│   ├── nmatrix/
│   │   ├── atlas.rb
│   │   ├── blas.rb
│   │   ├── cruby/
│   │   │   └── math.rb
│   │   ├── enumerate.rb
│   │   ├── fftw.rb
│   │   ├── homogeneous.rb
│   │   ├── io/
│   │   │   ├── fortran_format.rb
│   │   │   ├── harwell_boeing.rb
│   │   │   ├── market.rb
│   │   │   ├── mat5_reader.rb
│   │   │   ├── mat_reader.rb
│   │   │   └── point_cloud.rb
│   │   ├── jruby/
│   │   │   ├── decomposition.rb
│   │   │   ├── enumerable.rb
│   │   │   ├── error.rb
│   │   │   ├── math.rb
│   │   │   ├── nmatrix_java.rb
│   │   │   ├── operators.rb
│   │   │   └── slice.rb
│   │   ├── lapack_core.rb
│   │   ├── lapack_ext_common.rb
│   │   ├── lapack_plugin.rb
│   │   ├── lapacke.rb
│   │   ├── math.rb
│   │   ├── mkmf.rb
│   │   ├── monkeys.rb
│   │   ├── nmatrix.rb
│   │   ├── rspec.rb
│   │   ├── shortcuts.rb
│   │   ├── version.rb
│   │   └── yale_functions.rb
│   └── nmatrix.rb
├── nmatrix-atlas.gemspec
├── nmatrix-fftw.gemspec
├── nmatrix-lapacke.gemspec
├── nmatrix.gemspec
├── scripts/
│   ├── benchmarks/
│   │   └── nmatrix_creation.rb
│   ├── switch_lapack_ubuntu.rb
│   └── ttable_helper.rb
├── spec/
│   ├── 00_nmatrix_spec.rb
│   ├── 01_enum_spec.rb
│   ├── 02_slice_spec.rb
│   ├── 03_nmatrix_monkeys_spec.rb
│   ├── 2x2_dense_double.mat
│   ├── 4x4_sparse.mat
│   ├── 4x5_dense.mat
│   ├── blas_spec.rb
│   ├── elementwise_spec.rb
│   ├── homogeneous_spec.rb
│   ├── io/
│   │   ├── fortran_format_spec.rb
│   │   ├── harwell_boeing_spec.rb
│   │   └── test.rua
│   ├── io_spec.rb
│   ├── lapack_core_spec.rb
│   ├── leakcheck.rb
│   ├── math_spec.rb
│   ├── nmatrix_yale_resize_test_associations.yaml
│   ├── nmatrix_yale_spec.rb
│   ├── plugins/
│   │   ├── atlas/
│   │   │   └── atlas_spec.rb
│   │   ├── fftw/
│   │   │   └── fftw_spec.rb
│   │   └── lapacke/
│   │       └── lapacke_spec.rb
│   ├── rspec_monkeys.rb
│   ├── rspec_spec.rb
│   ├── shortcuts_spec.rb
│   ├── slice_set_spec.rb
│   ├── spec_helper.rb
│   ├── stat_spec.rb
│   └── test.pcd
└── travis.sh

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.idea
Gemfile.lock
ext/nmatrix/dense/daxpy.c
ext/nmatrix/dense/dgeco.c
ext/nmatrix/dense/dgefa.c
ext/nmatrix/dense/dgemm.c
ext/nmatrix/dense/dgemv.c
ext/nmatrix/dense/dscal.c
ext/nmatrix/dense/idamax.c
ext/nmatrix/dense/467.c
ext/nmatrix/dense/*.f
ext/nmatrix/dense/transpose.txt
ext/nmatrix/yale/aicm.tex
tmp/
test.c
spec/*.mtx
*.so
*.bundle
*.bundle.dSYM
*.log
*.sw?
*~
/tags
*.gem
html/
doc/
docs/
pkg/
.autotest
ext/nmatrix_java/vendor/
ext/nmatrix_java/target/
ext/nmatrix_java/build/
ext/nmatrix_java/target/
*.class
*.jar

================================================
FILE: .rspec
================================================
--color
--format RSpec::Longrun::Formatter


================================================
FILE: .travis.yml
================================================
language: ruby
sudo: required
cache: bundler
os:
  - linux
  - osx
osx_image: xcode7.2
env:
  - USE_ATLAS=1       # This configuration installs ATLAS, builds and tests the nmatrix, nmatrix-atlas, and nmatrix-lapacke gems
  - USE_OPENBLAS=1    # Installs OpenBLAS and reference LAPACK, builds and tests nmatrix, nmatrix-lapacke
  - USE_REF=1         # Installs OpenBLAS and reference LAPACK, builds and tests nmatrix, nmatrix-lapacke
  - NO_EXTERNAL_LIB=1 # No external libraries installed, only nmatrix
rvm:
  - 2.0.0-p648
  - 2.1.8
  - 2.2.4
  - 2.3.0
  - 2.4.2
  - ruby-head
  # The latest stable and head versions built by clang
  - 2.3.0-clang
  - ruby-head-clang
  # JRuby versions --- experimental, pending merging Prasun's GSoC project.
  - jruby-9.0.5.0 # earliest supported version (uncomment when jruby-head is passing)
  - jruby-head    # latest supported JRuby
  # Make sure to add exclude lines for new JRuby versions below.

before_install: ./travis.sh before_install

install: ./travis.sh install

script: ./travis.sh script

# Define extra configurations to add to the build matrix.
# The idea here is that the USE_ATLAS=1 option should exercise all the ruby
# code, so it is the only one we need to test with all versions of ruby.
# For other configurations we only test with one version of ruby.
matrix:
  exclude:
    - rvm: jruby-head
      env: USE_ATLAS=1
    - rvm: jruby-head
      env: USE_OPENBLAS=1
    - rvm: jruby-head
      env: USE_REF=1
    - rvm: jruby-9.0.5.0
      env: USE_ATLAS=1
    - rvm: jruby-9.0.5.0
      env: USE_OPENBLAS=1
    - rvm: jruby-9.0.5.0
      env: USE_REF=1    
    # NOTE: The following two ruby versions on OSX are currently unavailable
    - os: osx
      rvm: 2.0.0-p648
    - os: osx
      rvm: 2.1.8
    - os: osx
      rvm: 2.2.4
    - os: osx
      rvm: 2.3.0
    - os: osx
      rvm: ruby-head
    - os: osx
      rvm: 2.3.0-clang
    - os: osx
      rvm: ruby-head-clang
    # FIXME: The following configuration is unavailable because ATLAS should be built from source.
    #        We need homebrew formula for ATLAS and its bottle.
    - os: osx
      env: USE_ATLAS=1
    # FIXME: The following configuration takes too long time when installing homebrew/dupes/lapack.
    #        We need the bottle of lapack formula.
    - os: osx
      env: USE_REF=1
  include:
    # The latest stable and head versions of ruby built by clang on OSX
    - os: osx
      compiler: clang
      rvm: 2.2
      env:
        - ruby_version=2.4.0-dev USE_OPENBLAS=1
    - os: osx
      compiler: clang
      rvm: 2.2
      env:
        - ruby_version=2.4.0-dev NO_EXTERNAL_LIB=1
    - os: osx
      compiler: clang
      rvm: 2.2
      env:
        - ruby_version=2.3.0 USE_OPENBLAS=1
    - os: osx
      compiler: clang
      rvm: 2.2
      env:
        - ruby_version=2.3.0 NO_EXTERNAL_LIB=1
    # The latest version of Ruby 2.2.x built by clang on OSX
    - os: osx
      compiler: clang
      rvm: 2.2
      env:
        - ruby_version=2.2.4 USE_OPENBLAS=1
    # The latest version of Ruby 2.1.x built by clang on OSX
    - os: osx
      compiler: clang
      rvm: 2.2
      env:
        - ruby_version=2.1.8 USE_OPENBLAS=1
  allow_failures:
    # trunk
    - rvm: ruby-head
    - rvm: ruby-head-clang
    - os: osx
      compiler: clang
      rvm: 2.2
      env:
        - ruby_version=2.4.0-dev USE_OPENBLAS=1
    - os: osx
      compiler: clang
      rvm: 2.2
      env:
        - ruby_version=2.4.0-dev NO_EXTERNAL_LIB=1

notifications:
  irc: "chat.freenode.net#sciruby"


================================================
FILE: CONTRIBUTING.md
================================================
NMatrix is part of SciRuby, a collaborative effort to bring scientific
computation to Ruby. If you want to help, please do so!

This guide covers ways in which you can contribute to the development
of SciRuby and, more specifically, NMatrix.

## How to help

There are various ways to help NMatrix: bug reports, coding and
documentation. All of them are important.

First, you can help implement new features or bug fixes. To do that,
visit our [roadmap](https://github.com/SciRuby/nmatrix/wiki/Roadmap)
or our [issue tracker][2]. If you find something that you want to work
on, post it in the issue or on our [mailing list][1].

You need to send tests together with your code. No exceptions. You can
ask for our opinion, but we won't accept patches without good spec
coverage.

We use RSpec for testing. If you aren't familiar with it, there's a
good [guide to better specs with RSpec](http://betterspecs.org/) that
shows a bit of the syntax and how to use it properly.  However, the
best resource is probably the specs that already exist -- so just read
them.

And don't forget to write documentation (we use rdoc). It's necessary
to allow others to know what's available in the library. There's a
section on it later in this guide.

We only accept bug reports and pull requests in GitHub. You'll need to
create a new (free) account if you don't have one already. To learn
how to create a pull request, please see
[this guide on collaborating](https://help.github.com/categories/63/articles).

If you have a question about how to use NMatrix or SciRuby in general
or a feature/change in mind, please ask the
[sciruby-dev mailing list][1].

Thanks!

## Coding

To start helping with the code, you need to have all the dependencies in place:

- GCC 4.3+
- git
- Ruby 1.9+
- `bundler` gem
- ATLAS/LAPACKE/FFTW dependending on the plugin you want to change.

Now, you need to clone the git repository:

```bash
$ git clone git://github.com/SciRuby/nmatrix.git
$ cd nmatrix
$ bundle install
$ rake compile
$ rake spec
```

This will install all dependencies, compile the extension and run the
specs.

For **JRuby**

```bash
$ mkdir ext/nmatrix_java/vendor
Download commons_math.3.6.1 jar and place it in ext/nmatrix_java/vendor directory
$ mkdir -p ext/nmatrix_java/build/class
$ mkdir ext/nmatrix_java/target
$ rake jruby
```

If everything's fine until now, you can create a new branch to work on
your feature:

```bash
$ git branch new-feature
$ git checkout new-feature
```

Before commiting any code, please read our
[Contributor Agreement](http://github.com/SciRuby/sciruby/wiki/Contributor-Agreement).

### Guidelines for interfacing with C/C++

NMatrix uses a lot of C/C++ to speed up execution of processes and
give more control over data types, storage types, etc. Since we are
interfacing between two very different languages, things can get out
of hand pretty fast.

Please go thorough this before you create any C accessors:

* Perform all pre-computation error checking in Ruby.
* Curate any extra data (cloned objects, trivial computations, etc.) in Ruby.
* Do _NOT_ resolve VALUE into constituent elements unless they reach the function where the elements are needed or it is absolutely necessary. Passing around a VALUE in the C/C++ core is much more convienient than passing around `void*` pointers which point to an array of matrix elements.

Basically, follow a practice of 'once you enter C, never look back!'.

If you have something more in mind, discuss it in the issue tracker or
on
[this](https://groups.google.com/forum/#!topic/sciruby-dev/OJxhrGG309o)
thread.

## C/C++ style guide

This section is a work in progress.

* Use camel_case notation for arguments. No upper case.
* Write a brief description of the arguments that your function
  receives in the comments directly above the function.
* Explicitly state in the comments any anomalies that your function
  might have. For example, that it does not work with a certain
  storage or data type.

## Documentation

There are two ways in which NMatrix is being documented: guides and
comments, which are converted with RDoc into the documentation seen in
[sciruby.com](http://sciruby.com).

If you want to write a guide on how to use NMatrix to solve some
problem or simply showing how to use one of its features, write it as
a wiki page and send an e-mail on the [mailing list][1]. We're working
to improve this process.

If you aren't familiar with RDoc syntax,
[this is the official documentation](http://docs.seattlerb.org/rdoc/RDoc/Markup.html).

## Making new nmatrix extensions

From version 0.2, NMatrix supports extensions, all of which can be
hosted from the main nmatrix repo.

Refer to
[this blog post ](http://wlevine.github.io/2015/06/15/releasing-multiple-gems-with-c-extensions-from-the-same-repository.html)
to see how to do that in case you want to write your own extension for
nmatrix.

## Conclusion

This guide was heavily based on the
[Contributing to Ruby on Rails guide](http://edgeguides.rubyonrails.org/contributing_to_ruby_on_rails.html).

[1]: https://groups.google.com/forum/?fromgroups#!forum/sciruby-dev
[2]: https://github.com/sciruby/nmatrix/issues?sort=created&state=open


================================================
FILE: Gemfile
================================================
source 'https://rubygems.org'

#main gemspec
gemspec :name => 'nmatrix'

#plugin gemspecs
Dir['nmatrix-*.gemspec'].each do |gemspec_file|
  plugin_name = gemspec_file.match(/(nmatrix-.*)\.gemspec/)[1]
  gemspec(:name => plugin_name, :development_group => :plugin)
end


================================================
FILE: History.txt
================================================
=== 0.0.1 / 2012-04-10

* 1 major enhancement

  * Initial alpha release

=== 0.0.2 / 2012-09-21

* 15 major enhancements

  * Second alpha release

  * Rewrote NMatrix in C++0x and C++11 using templates, namespaces;
    removed Ruby generators and CAST parser

  * Added preliminary C API

  * Slicing and referencing support for dense and list matrices (by
    @flipback)

  * BLAS level-3 xTRSM algorithm added for rationals and BLAS types

  * LAPACK support added, including partially working xGETRF
    subroutine

  * Element-wise comparisons now return byte-matrices

  * Element-wise operations on list matrices may alter the default
    value of the return matrix

  * Element-wise division behaves like Ruby division

  * Improved MATLAB .MAT v5 file reading

  * clang support

  * `==` operator now used for matrix equality, `=~` and `!~` for
    element-wise comparisons

  * Dense `each` returns an Enumerator when called without a block

  * Sped up list storage item deletion, fixed bugs

  * List matrix-to-hash conversion with `to_h`

* Note: Element-wise list operations current disabled

=== 0.0.3 / 2013-01-18

* 8 major enhancements

  * Matrix-scalar operations (dense, list)

  * Shortcuts for matrix creation (by @agarie)

  * Access to most ATLAS-implemented LAPACK functions for those
    with ATLAS' CLAPACK interface: xGETRF, xGETRI, xGETRS, xGESV,
    xPOTRF, xPOTRI, xPOTRS, xPOSV, xLASWP, xSCAL, xLAUUM

  * Access to additional ATLAS-implemented BLAS functions: xTRMM,
    xSYRK, xHERK, xROT, xROTG

  * Non-ATLAS versions of CLAPACK functions: xLASWP, xSCAL, xLAUUM,
    xROT

  * Matrix inversion (LU and Cholesky; requires CLAPACK)

  * LU factoring with and without CLAPACK

  * Native matrix I/O for dense (supporting upper, lower, hermitian,
    skew, symmetric, and general) and yale (general only); excludes
    Ruby objects currently

* 2 bug fixes:

  * Yale-to-list casting

  * Now requires packable-1.3.5 or higher, fixing a problem with
    MATLAB .mat v5 file I/O (specific to doubles)

=== 0.0.4 / 2013-05-17

* 3 major enhancements

  * Added a more user-friendly interface for cblas_rot in the form of
    NMatrix::BLAS::rot

  * Added to_hash for Yale matrices

  * Improved source code documentation (by @agarie)

* 4 minor enhancements

  * Spec clean-up (by @masaomi)

  * Made it possible to request a different itype internally for Yale
    matrices

  * Improved space usage of Yale slice-by-copying, which was
    requesting more space than needed

  * Improved compile-time Mac OS X and Ubuntu library searching

* 8 bug fixes:

  * NMatrix::BLAS::gemv segfaults

  * Fixed Yale matrix slice-by-copy write error where default itypes
    (which are based on shape) differ, and a separate problem where
    incorrect IJA and A entries were written.

  * NVector-scalar operations and NVector-NVector element-wise
    options now return an NVector instead of an NMatrix

  * Addressed problems with segmentation faults during iteration (by
    @cjfuller)

  * Addressed Ubuntu/Debian installation problems (incompatibility
    with apt-supplied atlas)

  * Fixed transpose behavior following slice-by-reference (by
    @cjfuller)

  * Fixed gem install command in Rakefile (by @jpmckinney)

  * Fixed Spanish language compile issue (by @imcsk8 and @agarie)

=== 0.0.5 / 2013-07-09

* 4 major enhancements

  * NVector orientation is now controlled by its shape, not by the
    @orientation property

  * NVector default orientation is now a row vector rather than a
    column, as this is more efficient for Yale storage

  * NVector objects may now be created with dtypes other than dense

  * Exposure of additional ATLAS-implemented BLAS functions,
    including native rational and Ruby object support, for xANUM (sum
    of the absolute values of a vector) and xNRM2 (2-norm of a
    vector); and Ruby helper functions BLAS::anum and BLAS::nrm2
    which should do more careful argument sanity checking

* 9 minor enhancements

  * Added #yale_vector_insert to NMatrix::YaleFunctions, to speed up
    insertion of multiple items into a Yale matrix

  * Added #yale_nd_row, #yale_nd_row_as_hash, #yale_nd_row_as_array,
    #yale_nd_row_as_set, #yale_nd_row_as_sorted_set, #yale_row,
    #yale_row_as_hash, #yale_row_as_array, #yale_row_as_set,
    #yale_row_as_sorted_set, #yale_nd_row_size to
    NMatrix::YaleFunctions in order to speed up getting multiple
    items from some row of a Yale matrix

  * Improved #yale_ija, #yale_a, #yale_d by allowing an optional
    index argument, which returns a single array value instead of
    copying and returning the entire array

  * Improved sorting algorithm for Yale matrix multiplication;
    instead of selection sort, now uses quicksort; and subs in
    insertion sort for small partitions

  * Slicing a single row or column now returns an NVector instead
    of an NMatrix (does not yet work for n-dimensional matrices)

  * Improved function documentation for NVector and NMatrix

  * Added #min, #max, #to_a, #shuffle, #shuffle!, #absolute_sum,
    #norm2 functions to NVector

  * Aliased missing dimension of NVector#each_stored_with_indices to
    #each_stored_with_index, which only yields a value and i or j
    (not both i and j) depending on the vector orientation

  * Added #each_row, #each_column to NMatrix

* 5 bug fixes:

  * Dense iterators now return self (an NMatrix) in order to be
    consistent with Ruby Array behavior (by @cjfuller)

  * Fixed Yale resize problem (by @v0dro)

  * Fixed Yale nx1 times 1xn multiplication problem

  * Fixed Yale sorting-following-multiplication problem

  * NMatrix::read() now raises an exception when asked to read a file 
    that does not exist

=== 0.0.6 / 2013-08-09

* 8 major enhancements:

  * Refactored iteration, so that each storage type now has each of:
    #each, #each_with_indices, #each_stored_with_indices

  * Added element-wise power function (**) for dense matrices (by
    @agarie)

  * Dramatically improved matrix element-wise and scalar functions so
    C++ templates are no longer necessary; element-wise operations
    may now be written in protected Ruby methods that look like
    NMatrix#__list_elementwise_op__ and NMatrix#__list_scalar_op__

  * Element-wise and scalar operations that might return a true or
    false now return Ruby matrices

  * Yale element-wise and scalar operations have been added

  * Yale is now allowed to have a non-zero default (specifically to
    permit true-false matrices and nil sparse bases)

  * Dramatically improved NMatrix#cast to allow for hashed options
    including a :default for list and yale casts

  * Dramatically improved speed of compilation

* 14 minor enhancements:

  * Improved documentation for exposed BLAS and LAPACK functions

  * Allowed for use of BLAS::rot without cloning x and y (in-place
    plane rotation); removed unnecessary test of unfriendly version

  * Added more user-friendly cblas_xrotg implementation: BLAS::rotg

  * Moved NMatrix::YaleFunctions::yale_vector_insert to
    NMatrix#__yale_vector_set__, which is more consistent with
    behavior

  * Changed notations for protected stype-specific functions, which
    now look like __stype_method_name__, e.g., __yale_vector_set__

  * Added NMatrix#__list_default_value__ protected function to get
    the initial (sparse) value for list matrices

  * Changed behavior and names of NMatrix::YaleFunctions methods
    which get column indices and cell contents by row, so that they
    now expect the :keys option (like Hash#keys) instead of :array,
    which doesn't make sense; name changes are as follows:
      yale_row_as_sorted_set -> yale_ja_d_keys_sorted_set_at
      yale_row_as_set -> yale_ja_d_keys_set_at
      yale_row_as_array -> yale_ja_d_keys_at
      yale_nd_row_as_sorted_set -> yale_ja_sorted_set_at
      yale_nd_row_as_set -> yale_ja_set_at
      yale_nd_row_as_array -> yale_ja_at
    Aliases are included but will be removed without notice.

  * Added NVector#sorted_indices and #binned_sorted_indices for use
    when running k-nearest neighbor searches on a distance matrix

  * Added NVector::logspace shortcut function (analogous to
    NVector::linspace)

  * Cleaned up code by removing monkey patches that we stopped using
    circa v0.0.2 (Array#min, Array#max, String#constantize,
    String#camelize, String#underscore)

  * Re-enabled element-wise mod (%) method

  * Added NMatrix::guess_dtype class method, which allows you to
    figure out what dtype (roughly) should be used for any given
    Ruby value (e.g., 3)

  * String and nil objects in NMatrix cells are no longer outlawed
    (but are not supported), provided they are of the :object dtype

  * NMatrix#diag shortcut for specifying sparse matrix with a user-
    specified diagonal array (by @ryanmt)

* 3 bug fixes:

  * Corrected BLAS::rot bounds checking on optional n argument

  * Removed BLAS::rotg and BLAS::nrm2 code for rational numbers, as
    both involve a square root

  * Repaired list matrix element-wise functions

=== 0.0.7 / 2013-08-22

* 6 major enhancements:

  * Eliminated NVector in favor of NMatrix objects with
    #effective_dim smaller than #dim; added NVector-like
    functionality to NMatrix, sometimes with #respond_to? dependent
    upon matrix dimensions; allowed for NVector.new to continue
    to function as before, but now returns an NMatrix instead

  * Began major re-factoring of headers for math.cpp

  * Added two singular value decomposition functions for dense
    NMatrix objects, #gesvd and #gesdd, for floating point and
    complex dtypes

  * Added additional slicing shorthand, which uses hashes (e.g.,
    n[0=>3,2] for n[0..3,2]), which may eventually allow users to use
    n[0:3,2] notation instead (needs Ruby core patch)

  * #[] and #[]= calls no longer need index on those matrix shape
    elements which are equal to 1 (e.g., vectors)

  * Yale slicing-by-reference has been added

* 18 minor enhancements:

  * Added user-friendly NMatrix::LAPACK::laswp method

  * Added NMatrix#permute_columns! and NMatrix#permute_columns

  * Added NMatrix#abs to compute element-wise absolute values, and
    #abs_dtype to determine the dtype returned by a call to #abs on a
    given matrix (needed for RSpec)

  * Added NMatrix#size to compute the total number of cells in an
    NMatrix object (solely accounting for the shape, not sparsity)

  * Added RSpec monkey patches for #be_within to work with NMatrix
    objects; usable by requiring "nmatrix/rspec"

  * Added experimental NMatrix::LAPACK::lapack_gesvd function (which
    does NOT depend upon CLAPACK) (by @ryanmt and @mohawkjohn)

  * Added experimental non-LAPACK-dependent function
    NMatrix::LAPACK::lapack_gesdd

  * Added NMatrix#supershape method for getting the shape of a
    slice's parent or other ancestor, which may be useful for calling
    ATLAS and LAPACK functions on slices

  * Aliased NMatrix[] to function like N[] shortcut for matrix
    creation (by @agarie)

  * Added #layer for matrices with dimension greater than two
    (corresponds to #row and #column)

  * Added #rank and #each_rank generics for #row/#column/#layer and
    #each_row/#each_column/#each_layer respectively (#each_rank
    replaces #each_along_dim)

  * Replaced #reduce_along_dim with #inject_rank

  * NMatrix#to_a now works for up to two dimensional matrices; and
    returns a flattened array for single-row matrices

  * NMatrix#to_flat_a now returns a flattened array of all entries

  * Re-organized NMatrix Ruby sources into multiple files: math.rb
    for instance methods which are mathematical in nature or are
    essentially ATLAS/LAPACK/BLAS calls, enumerate.rb for methods
    involving enumeration; and existing shortcuts.rb for convenience
    functions and shortcut constructors, and nmatrix.rb for core
    functionality (#inspect, #to_h, #to_a, #to_s, #pretty_print,
    and so on)

  * Improved #pretty_print, which now also prints layers (the third
    dimension in a 3D matrix)

  * Re-factored some of dense slicing to remove some redundant code

  * Added shortcut functions #list?, #dense?, #yale? for quickly
    testing matrix storage type

* 5 bug fixes:

  * Fixed compilation problem involving <typeinfo> and <vector> STL
    headers

  * Fixed NMatrix#inverse problem with non-square matrices

  * Fixed invalid read problem detected by Valgrind for
    Yale element-wise subtraction in spec

  * Fixed conversion from Ruby object to Complex and Rational

  * Fixed memory leak in slicing

=== 0.0.8 / 2013-08-23

* 2 bug fixes:

  * Fixed Ubuntu compilation bug caused by math header file
    refactoring

  * Fixed pry version error which manifests on some systems but not
    others

=== 0.0.9 / 2013-09-18

* 5 major enhancements:

  * Re-factored NMatrix constructor

  * Improved usability of NMatrix shortcut constructor options
    (e.g., #zeros, #ones, #random, etc.) using new NMatrix
    constructor

  * Left-assignment of slices for all matrix storage types (uses a
    dense cast, or accepts an array or single value)

  * Re-factored Yale into a more object-oriented and less confusing
    set of header files

  * Enabled Travis CI (by @cjfuller)

* 4 minor enhancements:

  * Renamed some specs in order to change their test order, so that
    critical tests fail first (particularly in the case of
    segmentation faults)

  * Default dtype is now :object when no initial values are
    specified

  * Deprecated NVector#initialize and a number of other unnecessary
    NVector functionality

  * Made Ubuntu compilation significantly easier (by @cjfuller)

* 2 bug fixes:

  * nil values in matrices are now pretty printed as "nil"

  * Casting from dense to Yale now properly accepts the default
    value option

=== 0.1.0.rc1 / 2013-12-28

* 4 major enhancements:

  * Improved garbage collection strategy for partial object creation
    (i.e., when VALUEs are allocated but not registered right away),
    which in addition to fixing numerous bugs should prevent some new
    bugs from arising in the future (by @cjfuller)

  * Implemented list storage transpose

  * Implemented generic n-dimensional transpose

  * Implemented == comparison between differing matrix stypes

* 9 minor enhancements:

  * User-friendly #gesvd and #gesdd updates (by @ryanmt)

  * Added experimental #yale_row_key_intersection function for expert
    recommendation problems

  * Added additional *indgen shortcuts and changed behavior for some;
    now, #cindgen for :complex64, #zindgen for :complex128, #findgen
    for :float32, #dindgen for :float64, #rindgen for :rational128,
    and #rbindgen for Ruby objects (which contain integers); also,
    removed code repetition

  * Changed #stddev to use elementwise #sqrt instead of a manual map
    block (by @cjfuller)

  * Added alias from MATLAB `load_mat` method to `load` for
    consistency with the MatrixMarket loader

  * Improved organization by moving list and yale code into storage/
    subdirectories

  * Added NMatrix#potrf! and NMatrix#getrf, which are instance methods
    for calling CLAPACK functions (NMatrix#getrf! already existed)

  * Added GCC installation instructions for Mac OS X Mavericks, and
    updated the old installation instructions for Mac OS X (both
    found in scripts/)

  * Switched NMatrix::VERSION to work more like Rails::VERSION, with
    support for MAJOR, MINOR, TINY, and PRE

  * Added #concat, #hconcat, #vconcat, and #dconcat for joining
    matrices together

* 16 bug fixes:

  * Spec revisions for lapack_gesdd and lapack_gesvd (by @ryanmt)

  * Fixed two double-free problems (by @cjfuller and @mohawkjohn)

  * Fixed contiguous array marking fencepost error

  * Fixed C/C++ API compatibility problem preventing rb/gsl linking

  * Resolved a number of compiler warnings, including one return-type
    problem that would likely have become a garbage collection error
    (if it wasn't already)

  * Fixed -O3 optimization problems

  * Restored NMatrix#asum, #nrm2, #binned_sorted_indices,
    #sorted_indices which were inadvertantly removed by NVector
    deprecation; have not tested

  * Experimental #yale_nd_row and functions which call it now checks
    range of argument to prevent segfault

  * Fixed :* shortcut for a full list dimension (by @cjfuller)

  * Fixed list construction problem which occurred when an initial
    value array was provided (by @cjfuller)

  * Fixed #inject issue with list and yale matrices of two dimensions
    (by @cjfuller)

  * Fixed several garbage collection problems (also listed under
    enhancements) (by @cjfuller)

  * Updated object cleaning target in extconf.rb

  * Fixed possible compilation problem on Mavericks with Xcode 5.02

  * Fixed errors involving undefined symbols, unresolved symbols, and
    lazy symbol binding

  * Improved LAPACK and BLAS header selection for Ubuntu/Debian
    systems with ATLAS (by @mvz)

=== 0.1.0.rc2 / 2014-03-12

* No major enhancements.

* 14 minor enhancements:

  * Implemented negative-index slicing (by @rajatkapoor)

  * Added reader for Point Cloud Library's PCD format

  * Added Ruby 2.1 support (including Travis CI testing)

  * Implemented LAPACK-independent exact inverse calculation for
    dense matrices of size 2x2 and 3x3, as well as

  * Added NMatrix::has_clapack? method to determine whether CLAPACK
    support has been compiled in

  * Improved conformance of specs to RSpec best practices (by 
    @duggiefresh)

  * Travis CI now updates the IRC channel when a check passes (by
    @agarie)

  * Added NMatrix#data_pointer, which returns the memory address of
    the stored elements in a matrix (generally for use with FFI and
    other libraries that need pointers)

  * Made NMatrix#clone_structure a public method (was protected)

  * Added :scale option for NMatrix::random to handle non-floating
    point forms

  * Added complex support to NMatrix::random

  * Aliased NMatrix::random to NMatrix::rand

  * Added NMatrix#reshape! for in-place reshape of dense matrices (by
    @rajatkapoor)

  * Implemented unary negation of matrices

* 6 bug fixes:

  * Fixed dot product operation on 1-dimensional matrices (by @rve
    and @cjfuller)

  * Fixed segfault on 1-dimensional matrix transpose (by @cjfuller)

  * Fixed compile error with Ruby 2.1 (by @diminish7)

  * Fixed regression which wasn't causing any problems but was
    counter to design: stride was declared prior to data storage for
    dense matrix storage

  * Fixed Rakefile problem which was causing specs to run twice in a
    row with each call to rake spec

  * NMatrix::random now raises an exception when rational matrices
    are requested

=== 0.1.0.rc3 / 2014-03-27

* No major enhancements.

* 2 minor enhancements:

  * Exposed NMatrix::LAPACK::geev for LAPACK's xGEEV

  * Added out-of-place complex conjugate for dense and yale storage 
    (by @rve)

* 1 bug fixes:

  * Fixed critical bug with transposing a matrix reference slice (by
    @rajatkapoor)

=== 0.1.0.rc4 / 2014-07-24

* No major enhancements.

* 1 minor enhancement:

  * NMatrix#floor and #ceil implemented (by @v0dro)

* 2 bug fixes:

  * Disallowed out-of-bounds rank calls (by @andrewcsmith)

  * Fixed rspec 3.0 conflict with rspec-longrun 1.0.1

=== 0.1.0.rc5 / 2014-08-01

* No major enhancements.

* 1 minor enhancements:

  * Added optional extension for generating homogeneous
    transformation matrices for rotations and translations in three
    dimensions

* 3 bug fixes:

  * Fixed rake install (by @duggiefresh)

  * Fixed API problems which prevented NMatrix from working with
    the SciRuby rb-gsl fork

  * Fixed Yale #inject behavior (by @yoongkang)

=== 0.1.0 / 2014-12-11

* 3 major enhancements:

  * Updated to BSD 3-clause license

  * Removed Ruby 1.9.2 support; now requires Ruby 1.9.3 or 
    higher (by @v0dro)

  * Added Gauss-Jordan elimination for calculation of
    matrix inverses (by @v0dro)

* 6 minor enhancements:

  * Added trace method for square matrices

  * Improved Array#to_nm monkey patch so matrices and
    arrays can be interconverted easily, without need for a
    shape argument (by @andrewcsmith)

  * Added Harwell-Boeing and Fortran matrix format parsers
    (by @v0dro)

  * Removed soon-to-be-deprecated autoloads and replaced
    with a more robust method (by @duggiefresh)

  * Updated rake install task to use Bundler's GemHelper
    install task (by @duggiefresh)

  * Moved packable requirement from Gemfile to gemspec
    (by @andrewcsmith)

* 3 bug fixes:

  * Corrected Ubuntu clapack functionality checking, which
    should fix most functions which rely on the ATLAS
    version of clapack

  * Corrected NMatrix::gesdd workspace request size (by
    @yoongkang)

  * Fixed definition of NMatrix#asum for one-by-one
    matrices (by @andrewcsmith)

=== 0.2.0 / 2015-08-24

* 2 major enhancements:

  * External libraries are now linked via optional plugins,
    removing ATLAS dependencies (by @wlevine)

  * Made it possible to use NMatrix together with NArray (by
    @blackwinter)

* 9 minor enhancements:

  * Removed rational types (by @wlevine)

  * Added block-diagonal matrix generation method,
    NMatrix.block_diagonal (by @agisga)

  * Added Kronecker product method #kron_prod (by @agisga)

  * Made #permute_columns usage more intuitive (@agisga)

  * Added #pow method to raise a matrix to an integer power
    (by @v0dro)

  * Added #meshgrid method (by @dilcom)

  * Added #hessenberg method, for reducing matrices to upper
    Hessenberg form (by @v0dro)

  * Added calculation of correlation matrix with #corr and
    covariance using #cov (by @v0dro)

  * Added method for returning matrix diagonal, #diagonal
    (by @v0dro)

* 11 bug fixes:

  * Fixed #== operator problems (by @wlevine)

  * Fixed BLAS.gemv (by @wlevine)

  * Fixed #asum for single element complex matrices (by
    @wlevine)

  * Fixed determinant calculation (by @wlevine)

  * Fixed division by zero (by @spacewander)

  * Fixed NMatrix#respond_to? so it would accept two
    arguments properly (by @ktns)

  * Fixed NMatrix#hermitian? (by @agisga)

  * Fixed #gesdd, #gesvd (by @wlevine)

  * Fixed #symmetric? (by @ktns)

  * Made rdoc a development dependency to resolve dependency
    conflicts (by @matt-liu)

  * Fixed bug where Array#to_nm would alter the array (by
    @andrewcsmith)

=== 0.2.1 / 2016-01-18

* 3 major enhancements:
  
  * New plugin nmatrix-fftw for wrapping over FFTW (by
     @v0dro)
  
  * Change Ruby Array C pointer references to be compatible
    with Ruby 2.3.0 (by @mrkn)

  * NMatrix can now be compiled with clang (by @mrkn)

* 4 minor enhancements: 

  * Improved Travis configs to test on Linux and OSX with
    and without plugins (by @mrkn)

  * Added non-abbreviated versions to the options of
    NMatrix#solve; added more docs (by @agisga)

  * Added several specialized algorithms to NMatrix#solve
    for more efficient solving of linear systems with upper
    or lower triangular matrices (by @agisga)

  * Remove redundant C implementation of
    NMatrix#complex_conjugate (by @yoongkang)

* 4 bug fixes:

  * Fixed memory leak in math.cpp (inverse()) (by @lokeshh)

  * Check if optional permute parameter in NMatrix#transpose
    is an Array to prevent unexpected disappearing-parameter
    behavior (by @firemind)
    
  * Moved rubyobj_from_cval function into `nm` namespace
    from C-linkage to fix a C compile time error (by @mrkn)

  * Fixed undefined variable 'idefaults' in lapacke extconf
    (by @agisga)

=== 0.2.2 / 2016-07-22

* No major enhancements.

* 15 minor enhancements:

  * Added Hilbert and inverse Hilbert matrix functions
    #hilbert and #inv_hilbert (by @arafatk)

  * Added NMatrix.linspace constructor for generating a 
    vector with linearly spaced elements (by @gau27)

  * Added NMatrix.logspace constructor for generating a
    vector with logarithmically spaced elements (by @gau27)

  * Improved Travis configs (by @v0dro)

  * Added C API documentation and included ruby_constants.h
    in C API include files (by @v0dro)

  * Added #magic function to create magic square matrices
    (by @shahsaurabh0605)

  * Added NMatrix#last (by @gau27)

  * Added QR factorization by exposing LAPACK functions
    GEQRF, ORMQR, UNMQR (by @gau27)

  * Made templates a little smarter for those functions
    which require a separate return dtype by adding the
    MagnitudeDType template typename; and added a magnitude
    function to replace std::abs and abs to make complex
    and real templates more generic (by @mohawkjohn)

  * Added #adjugate and #adjugate! functions (by @sujithvm)

  * Added #scale and #scale! methods by exposing BLAS SCAL
    (by @lds56)

  * Re-factored type comparisons to use RB_TYPE_P instead of
    TYPE(obj) (by @mrkn)

  * Updated license to BSD 3-clause (by @gau27)

  * Cleaned up gem installation settings and dependencies
    (by @mrkn)

  * DRYed up extconf script (by @mrkn)

* 15 bug fixes:

  * Fixed offsets and changed limits in TRSM to follow the
    Fortran implementation (by @lokeshh), and adjusted
    triangular #solve accordingly (by @agisga)

  * Fixed NRM2 (CBLAS 2-norm) for complex types (by
    @lokeshh)

  * Fixed NRM2 divide-by-zero bug (by @arafatk)

  * Fixed #reshape! to work when changing dimensionality (by
    @wlevine)

  * Fixed ambiguous references by making proper use of
    namespace qualifiers for dtypes, allowing compilation in
    Windows using the mingw64 toolchain available through
    msys2 (by @preetpalS)

  * Replaced all uses of u_int8_t with uint8_t and added static
    assertions to prevent ambiguous use of u_intX_t types
    (by @preetpalS)

  * Added workaround in extconf script for Windows use of a
    different name for the null device (by @preetpalS)

  * Updated deprecated RSpec code and other miscellaneous
    cleanups (by @yui-knk)

  * Removed incomplete support of Hash as an argument for
    NMatrix#[] (by @yui-knk)

  * Fixed typo in slicing exception error message (by
    @mohawkjohn)

  * Fixed #concat implementation for case of differing
    sizes along concatenation dimension (by @alno)

  * Ensured dtype is preserved by #repeat (by @alno)

  * Fixed #det_exact for :object dtype (by @isuruf)

  * Stopped using deprecated register storage class
    specifier (by @mrkn)

  * Fixed clang/clang++ compiler selection by forcing use of
    clang++ when clang is used (by @mrkn)

=== 0.2.3 / 2016-07-25

* No major enhancements.

* No minor enhancements.

* 1 bug fix:

  * Fixed gem installation problem caused by mkmf
    abstraction (by @mrkn)

=== 0.2.4 / 2017-12-14

* No major enhancements.

* 2 minor enhancements:

  * Eliminated code reuse in math.rb between JRuby and MRI
    versions of library (by @prasunanand)

  * Slightly simplified #positive_definite? (by
    @prasunanand)

* 2 bug fixes:

  * Fixed compilation problem on Mac OS X High Sierra (by
    @mohawkjohn)

  * Fixed failing #block_diagonal spec (due to missing
    Array#sum) (by @mohawkjohn)


================================================
FILE: LICENSE.txt
================================================
This version of NMatrix is licensed under the BSD 3-clause license.

* http://sciruby.com
* http://github.com/sciruby/sciruby/wiki/License

You *must* read the Contributor Agreement before contributing code to the SciRuby Project. This is available online:

* http://github.com/sciruby/sciruby/wiki/Contributor-Agreement

-----

Copyright (c) 2010 - 2015, John Woods and the Ruby Science Foundation
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: Manifest.txt
================================================
History.txt
Manifest.txt
README.rdoc
LICENSE.txt
CONTRIBUTING.md
Rakefile
Gemfile
nmatrix.gemspec
nmatrix-atlas.gemspec
nmatrix-lapacke.gemspec
nmatrix-fftw.gemspec
travis.sh
.travis.yml
lib/nmatrix.rb
lib/nmatrix/atlas.rb
lib/nmatrix/blas.rb
lib/nmatrix/enumerate.rb
lib/nmatrix/homogeneous.rb
lib/nmatrix/math.rb
lib/nmatrix/mkmf.rb
lib/nmatrix/monkeys.rb
lib/nmatrix/nmatrix.rb
lib/nmatrix/shortcuts.rb
lib/nmatrix/version.rb
lib/nmatrix/yale_functions.rb
lib/nmatrix/fftw.rb
lib/nmatrix/lapack_core.rb
lib/nmatrix/lapack_ext_common.rb
lib/nmatrix/lapack_plugin.rb
lib/nmatrix/lapacke.rb
lib/nmatrix/rspec.rb
lib/nmatrix/io/market.rb
lib/nmatrix/io/mat5_reader.rb
lib/nmatrix/io/mat_reader.rb
lib/nmatrix/io/point_cloud.rb
lib/nmatrix/io/fortran_format.rb
lib/nmatrix/io/harwell_boeing.rb
lib/nmatrix/cruby/math.rb
lib/nmatrix/jruby/decomposition.rb
lib/nmatrix/jruby/enumerable.rb
lib/nmatrix/jruby/error.rb
lib/nmatrix/jruby/math.rb
lib/nmatrix/jruby/nmatrix_java.rb
lib/nmatrix/jruby/operators.rb
lib/nmatrix/jruby/slice.rb
ext/nmatrix/math/cblas_enums.h
ext/nmatrix/math/cblas_templates_core.h
ext/nmatrix/math/util.h
ext/nmatrix/math/magnitude.h
ext/nmatrix_atlas/extconf.rb
ext/nmatrix_atlas/math_atlas.cpp
ext/nmatrix_atlas/math_atlas/cblas_templates_atlas.h
ext/nmatrix_atlas/math_atlas/clapack_templates.h
ext/nmatrix_atlas/math_atlas/geev.h
ext/nmatrix_atlas/math_atlas/gesdd.h
ext/nmatrix_atlas/math_atlas/gesvd.h
ext/nmatrix_atlas/math_atlas/inc.h
ext/nmatrix_atlas/nmatrix_atlas.cpp
ext/nmatrix_java/nmatrix/data/Complex.java
ext/nmatrix_java/nmatrix/math/MathHelper.java
ext/nmatrix_java/nmatrix/storage/dense/ArrayComparator.java
ext/nmatrix_java/nmatrix/util/ArrayGenerator.java
ext/nmatrix_java/nmatrix/util/MatrixGenerator.java
ext/nmatrix_java/nmatrix/util/WrapperType.java
ext/nmatrix_java/test/AssertTests.java
ext/nmatrix_java/test/TestRunner.java
ext/nmatrix_lapacke/extconf.rb
ext/nmatrix_lapacke/lapacke.cpp
ext/nmatrix_lapacke/lapacke/include/lapacke.h
ext/nmatrix_lapacke/lapacke/include/lapacke_config.h
ext/nmatrix_lapacke/lapacke/include/lapacke_mangling.h
ext/nmatrix_lapacke/lapacke/include/lapacke_mangling_with_flags.h
ext/nmatrix_lapacke/lapacke/include/lapacke_utils.h
ext/nmatrix_lapacke/lapacke/src/lapacke_cgeev.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgeev_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgesdd.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgesdd_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgesvd.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgesvd_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgetri.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgetri_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrs.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrs_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cpotri.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cpotri_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrs.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrs_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgeev.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgeev_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgesdd.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgesdd_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgesvd.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgesvd_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgetri.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgetri_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrs.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrs_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dpotri.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dpotri_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrs.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrs_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgeev.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgeev_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgesdd.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgesdd_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgesvd.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgesvd_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgetri.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgetri_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrs.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrs_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_spotrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_spotrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_spotri.c
ext/nmatrix_lapacke/lapacke/src/lapacke_spotri_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_spotrs.c
ext/nmatrix_lapacke/lapacke/src/lapacke_spotrs_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgeev.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgeev_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgesdd.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgesdd_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgesvd.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgesvd_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgetri.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgetri_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrs.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrs_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zpotri.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zpotri_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrs.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrs_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgeqrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cgeqrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cunmqr.c
ext/nmatrix_lapacke/lapacke/src/lapacke_cunmqr_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgeqrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dgeqrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dormqr.c
ext/nmatrix_lapacke/lapacke/src/lapacke_dormqr_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgeqrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sgeqrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sormqr.c
ext/nmatrix_lapacke/lapacke/src/lapacke_sormqr_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgeqrf.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zgeqrf_work.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zunmqr.c
ext/nmatrix_lapacke/lapacke/src/lapacke_zunmqr_work.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_c_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_d_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_s_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_z_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_cge_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_cge_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_cpo_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_cpo_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_ctr_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_ctr_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_dge_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_dge_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_dpo_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_dpo_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_dtr_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_dtr_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_lsame.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_sge_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_sge_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_spo_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_spo_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_str_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_str_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_xerbla.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_zge_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_zge_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_zpo_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_zpo_trans.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_ztr_nancheck.c
ext/nmatrix_lapacke/lapacke/utils/lapacke_ztr_trans.c
ext/nmatrix_lapacke/lapacke_nmatrix.h
ext/nmatrix_lapacke/make_lapacke_cpp.rb
ext/nmatrix_lapacke/math_lapacke.cpp
ext/nmatrix_lapacke/math_lapacke/cblas_local.h
ext/nmatrix_lapacke/math_lapacke/cblas_templates_lapacke.h
ext/nmatrix_lapacke/math_lapacke/lapacke_templates.h
ext/nmatrix_lapacke/nmatrix_lapacke.cpp
ext/nmatrix/data/complex.h
ext/nmatrix/data/data.cpp
ext/nmatrix/data/data.h
ext/nmatrix/data/meta.h
ext/nmatrix/data/ruby_object.h
ext/nmatrix/storage/common.cpp
ext/nmatrix/storage/common.h
ext/nmatrix/storage/storage.cpp
ext/nmatrix/storage/storage.h
ext/nmatrix/storage/dense/dense.cpp
ext/nmatrix/storage/dense/dense.h
ext/nmatrix/storage/list/list.cpp
ext/nmatrix/storage/list/list.h
ext/nmatrix/storage/yale/yale.cpp
ext/nmatrix/storage/yale/yale.h
ext/nmatrix/storage/yale/class.h
ext/nmatrix/storage/yale/iterators/base.h
ext/nmatrix/storage/yale/iterators/iterator.h
ext/nmatrix/storage/yale/iterators/row.h
ext/nmatrix/storage/yale/iterators/row_stored.h
ext/nmatrix/storage/yale/iterators/row_stored_nd.h
ext/nmatrix/storage/yale/iterators/stored_diagonal.h
ext/nmatrix/storage/yale/math/transpose.h
ext/nmatrix/util/io.cpp
ext/nmatrix/util/io.h
ext/nmatrix/util/sl_list.cpp
ext/nmatrix/util/sl_list.h
ext/nmatrix/util/util.h
ext/nmatrix/math.cpp
ext/nmatrix/math/asum.h
ext/nmatrix/math/gemm.h
ext/nmatrix/math/gemv.h
ext/nmatrix/math/getrf.h
ext/nmatrix/math/getrs.h
ext/nmatrix/math/imax.h
ext/nmatrix/math/laswp.h
ext/nmatrix/math/long_dtype.h
ext/nmatrix/math/math.h
ext/nmatrix/math/nrm2.h
ext/nmatrix/math/rot.h
ext/nmatrix/math/rotg.h
ext/nmatrix/math/scal.h
ext/nmatrix/math/trsm.h
ext/nmatrix/nmatrix.cpp
ext/nmatrix/nmatrix.h
ext/nmatrix/ruby_constants.cpp
ext/nmatrix/ruby_constants.h
ext/nmatrix/ruby_nmatrix.c
ext/nmatrix/types.h
ext/nmatrix/nm_memory.h
ext/nmatrix/extconf.rb
ext/nmatrix_fftw/extconf.rb
ext/nmatrix_fftw/nmatrix_fftw.cpp


================================================
FILE: README.rdoc
================================================
= NMatrix

{<img src="https://badges.gitter.im/SciRuby/nmatrix.svg" alt="Join the chat at https://gitter.im/SciRuby/nmatrix">}[https://gitter.im/SciRuby/nmatrix?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge]

Fast Numerical Linear Algebra Library for Ruby

* {sciruby.com}[http://sciruby.com]
* {Google+}[https://plus.google.com/109304769076178160953/posts]
* {Google Group - Mailing List}[https://groups.google.com/forum/#!forum/sciruby-dev]
* {NMatrix Installation wiki}[https://github.com/SciRuby/nmatrix/wiki/Installation]
* {SciRuby Installation guide}[http://sciruby.com/docs#installation]

{<img src=https://travis-ci.org/SciRuby/nmatrix.png>}[https://travis-ci.org/SciRuby/nmatrix]

{<img src="https://codeclimate.com/github/SciRuby/nmatrix.png" />}[https://codeclimate.com/github/SciRuby/nmatrix]

== Description

NMatrix is a fast numerical linear algebra library for Ruby, with
dense and sparse matrices, written mostly in C and C++ (and with
experimental JRuby support). It is part of the SciRuby project.

NMatrix was inspired by {NArray}[http://narray.rubyforge.org], by Masahiro Tanaka.

Several gems are provided in this repository:
* +nmatrix+
* +nmatrix-java+
* +nmatrix-atlas+
* +nmatrix-lapacke+
* +nmatrix-fftw+

== Installation

To install the latest stable version:

    gem install nmatrix

NMatrix was originally written in C/C++, but an experimental JRuby version is also included (see instructions below for JRuby). For the MRI (C/C++) version, you need:
* Ruby 2.0 or later
* a compiler supporting C++11 (clang or GCC)

To install the +nmatrix-atlas+ or +nmatrix-lapacke+ extensions, an additional requirement is a
compatible LAPACK library.
Detailed directions for this step can be found
{here}[https://github.com/SciRuby/nmatrix/wiki/Installation].

If you want to obtain the latest (development) code, you should generally do:

    git clone https://github.com/SciRuby/nmatrix.git
    cd nmatrix/
    gem install bundler
    bundle install
    bundle exec rake compile
    bundle exec rake spec

If you want to try out the code without installing:

    bundle exec rake pry

To install:

    bundle exec rake install

=== JRuby

First, you need to download Apache Commons Math 3.6.1 (the JAR, which
you can find in the binary package). For example, in the NMatrix
directory, do:

    wget https://www.apache.org/dist/commons/math/binaries/commons-math3-3.6.1-bin.tar.gz
    tar zxvf commons-math3-3.6.1-bin.tar.gz
    mkdir ext/nmatrix_java/vendor/
    cp commons-math3-3.6.1/commons-math3-3.6.1.jar ext/nmatrix_java/vendor/

Next, create build directories:

    mkdir -p ext/nmatrix_java/build/class
    mkdir ext/nmatrix_java/target

Finally, compile and package as jar.

    rake jruby

=== Plugins

The commands above build and install only the core +nmatrix+ gem.  If
you want to build one or more of the plugin gems (+nmatrix-atlas+,
+nmatrix-lapacke+) in addition to the core nmatrix gem, use the
<tt>nmatrix_plugins=</tt> option, e.g.  <tt>rake compile
nmatrix_plugins=all</tt>, <tt>rake install nmatrix_plugins=atlas</tt>,
<tt>rake clean nmatrix_plugins=atlas,lapacke</tt>. Each of these
commands apply to the +nmatrix+ gem and any additional plugin gems
specified. For example, <tt>rake spec nmatrix_plugins=atlas</tt> will
test both the core +nmatrix+ gem and the +nmatrix-atlas+ gem.

=== Upgrading from NMatrix 0.1.0

If your code requires features provided by ATLAS (Cholesky
decomposition, singular value decomposition, eigenvalues/eigenvectors,
inverses of matrices bigger than 3-by-3), your code now depends on the
+nmatrix-atlas+ gem. You will need to add this a dependency of your
project and <tt>require 'nmatrix/atlas'</tt> in addition to
<tt>require 'nmatrix'</tt>. In most cases, no further changes should
be necessary, however there have been a few {API
changes}[https://github.com/SciRuby/nmatrix/wiki/API-Changes], please
check to see if these affect you.

== Documentation

If you have a suggestion or want to add documentation for any class or
method in NMatrix, please open an issue or send a pull request with
the changes.

You can find the complete API documentation {on our
website}[http://sciruby.com/nmatrix/docs/].

== Examples

Create a new NMatrix from a ruby Array:

    >> require 'nmatrix'
    >> NMatrix.new([2, 3], [0, 1, 2, 3, 4, 5], dtype: :int64)
    => [
        [0, 1, 2],
        [3, 4, 5]
       ]

Create a new NMatrix using the +N+ shortcut:

    >> m = N[ [2, 3, 4], [7, 8, 9] ]
    => [
        [2, 3, 4],
        [7, 8, 9]
       ]
    >> m.inspect
    => #<NMatrix:0x007f8e121b6cf8shape:[2,3] dtype:int32 stype:dense>

The above output requires that you have a pretty-print-enabled console
such as Pry; otherwise, you'll see the output given by +inspect+.

If you want to learn more about how to create a matrix, {read the guide in our wiki}[https://github.com/SciRuby/nmatrix/wiki/How-to-create-an-NMatrix].

Again, you can find the complete API documentation {on our website}[http://sciruby.com/nmatrix/docs/].

=== Using advanced features provided by plugins

Certain features (see the documentation) require either the
nmatrix-atlas or the nmatrix-lapacke gem to be installed. These can be
accessed by using <tt>require 'nmatrix/atlas'</tt> or <tt>require
'nmatrix/lapacke'</tt>. If you don't care which of the two gems is
installed, use <tt>require 'nmatrix/lapack_plugin'</tt>, which will
require whichever one of the two is available.

Fast fourier transforms can be conducted with the nmatrix-fftw
extension, which is an interface to the FFTW C library. Use
<tt>require 'nmatrix/fftw'</tt> for using this plugin.

== Plugin details

=== ATLAS and LAPACKE

The +nmatrix-atlas+ and +nmatrix-lapacke+ gems are optional extensions 
of the main +nmatrix+ gem that rely on external linear algebra libraries 
to provide advanced features for dense matrices (singular value 
decomposition, eigenvalue/eigenvector finding, Cholesky factorization), 
as well as providing faster implementations of common operations like 
multiplication, inverses, and determinants. +nmatrix-atlas+ requires the
{ATLAS library}[http://math-atlas.sourceforge.net/], while +nmatrix-lapacke+ 
is designed to work with various LAPACK implementations (including ATLAS). 
The +nmatrix-atlas+ and +nmatrix-lapacke+ gems both provide similar 
interfaces for using these advanced features.

=== *FFTW*

This is plugin for interfacing with the {FFTW library}[http://www.fftw.org].
It has been tested with FFTW 3.3.4.

It works reliably only with 64 bit numbers (or the NMatrix `:float64`
or `:complex128` data type). See the docs for more details.

== NArray compatibility

When NArray[http://masa16.github.io/narray/] is installed alongside
NMatrix, <tt>require 'nmatrix'</tt> will inadvertently load NArray's
+lib/nmatrix.rb+ file, usually accompanied by the following error:

    uninitialized constant NArray (NameError)

To make sure NMatrix is loaded properly in the presence of NArray, use
<tt>require 'nmatrix/nmatrix'</tt> instead of <tt>require
'nmatrix'</tt> in your code.

== Developers

Read the instructions in +CONTRIBUTING.md+ if you want to help
NMatrix.

== Features

The following features exist in the current version of NMatrix (0.1.0.rc1):

* Matrix and vector storage containers: dense, yale, list (more to come)
* Data types: byte (uint8), int8, int16, int32, int64, float32, float64, complex64, complex128,
  Ruby object
* Interconversion between storage and data types
* Element-wise and right-hand-scalar operations and comparisons for all matrix types
* Matrix-matrix multiplication for dense (with and without ATLAS) and yale
* Matrix-vector multiplication for dense (with and without ATLAS)
* Lots of enumerators (each, each_with_indices, each_row, each_column, each_rank, map, etc.)
* Matrix slicing by copy and reference (for dense, yale, and list)
* Native reading and writing of dense and yale matrices
  * Optional compression for dense matrices with symmetry or triangularity: symmetric, skew, hermitian, upper, lower
* Input/output:
  * Matlab .MAT v5 file input
  * MatrixMarket file input/output
  * Harwell-Boeing and Fortran file input
  * Point Cloud Library PCD file input
* C and C++ API
* BLAS internal implementations (no library) and external (with nmatrix-lapack or nmatrix-atlas) access:
  * Level 1: xROT, xROTG (BLAS dtypes only), xASUM, xNRM2, IxAMAX, xSCAL
  * Level 2: xGEMV
  * Level 3: xGEMM, xTRSM
* LAPACK access (with nmatrix-lapack or nmatrix-atlas plugin):
  * xGETRF, xGETRI, xGETRS, xGESV (Gaussian elimination)
  * xPOTRF, xPOTRI, xPOTRS, xPOSV (Cholesky factorization)
  * xGESVD, xGESDD (singular value decomposition)
  * xGEEV (eigenvalue decomposition of asymmetric square matrices)
* LAPACK-less internal implementations (no plugin or LAPACK needed and working on non-BLAS dtypes):
  * xGETRF, xGETRS
* LU decomposition
* Matrix inversions
* Determinant calculation for BLAS dtypes
* Traces
* Ruby/GSL interoperability (requires {SciRuby's fork of rb-gsl}[http://github.com/SciRuby/rb-gsl])
* slice assignments, e.g.,
    x[1..3,0..4] = some_other_matrix

=== Planned features (Short-to-Medium Term)

See the issues tracker for a list of planned features or to request
new ones.

== License

Copyright (c) 2012--17, John Woods and the Ruby Science Foundation.

All rights reserved.

NMatrix, along with SciRuby, is licensed under the BSD 2-clause
license. See {LICENSE.txt}[https://github.com/SciRuby/sciruby/wiki/License]
for details.

== Donations

Support a SciRuby Fellow:

{<img src=http://pledgie.com/campaigns/15783.png?skin_name=chrome>}[http://www.pledgie.com/campaigns/15783]


================================================
FILE: Rakefile
================================================
# -*- ruby -*-

require 'rubygems'
require 'rubygems/package_task'
require 'bundler'

#Specify plugins to build on the command line like:
#rake whatever nmatrix_plugins=atlas,lapacke
#or
#rake whatever nmatrix_plugins=all
#If you want to build *only* plugins and not the core nmatrix gem:
#rake whatever nmatrix_plugins=all nmatrix_core=false
if ENV["nmatrix_plugins"] == "all"
  gemspecs = Dir["*.gemspec"]
else
  plugins = []
  plugins = ENV["nmatrix_plugins"].split(",") if ENV["nmatrix_plugins"]
  gemspecs = ["nmatrix.gemspec"] #always include the main nmatrix gem
  plugins.each do |plugin|
    gemspecs << "nmatrix-#{plugin}.gemspec"
  end
end
if ENV["nmatrix_core"] == "false"
  gemspecs -= ["nmatrix.gemspec"]
end
gemspecs.map! { |gemspec| eval(IO.read(gemspec)) }

begin
  Bundler.setup(:default, :development)
rescue Bundler::BundlerError => e
  $stderr.puts e.message
  $stderr.puts "Run `bundle install` to install missing gems"
  exit e.status_code
end

desc "Build and install into system gems."
task :install => :repackage do
  gemspecs.each do |gemspec|
    gem_file = "pkg/#{gemspec.name}-#{gemspec.version}.gem"
    system "gem install '#{gem_file}'"
  end
end

require 'rake'
require "rake/extensiontask"

gemspecs.each do |gemspec|
  next unless gemspec.extensions
  gemspec.extensions.each do |extconf|
    ext_name = extconf.match(/ext\/(.*)\/extconf\.rb/)[1]
    Rake::ExtensionTask.new do |ext|
      ext.name = ext_name
      ext.ext_dir = "ext/#{ext_name}"
      ext.lib_dir = 'lib/'
      ext.source_pattern = "**/*.{c,cpp,h}"
    end
  end
end

gemspecs.each do |gemspec|
  Gem::PackageTask.new(gemspec).define
end

require 'rspec/core/rake_task'
require 'rspec/core'
namespace :spec do
  #We need a separate rake task for each plugin, rather than one big task that
  #runs all of the specs. This is because there's no way to tell rspec
  #to run the specs in a certain order with (say) "nmatrix/atlas" require'd
  #for some of the specs, but not for others, without splitting them up like
  #this.
  spec_tasks = []
  gemspecs.each do |gemspec|
    test_files = gemspec.test_files
    test_files.keep_if { |file| file =~ /_spec\.rb$/ }
    test_files -= ['spec/nmatrix_yale_spec.rb', 'spec/blas_spec.rb', 'spec/lapack_core_spec.rb'] if /java/ === RUBY_PLATFORM
    next if test_files.empty?
    spec_tasks << gemspec.name
    RSpec::Core::RakeTask.new(gemspec.name) do |spec|
      spec.pattern = FileList.new(test_files)
    end
  end
  task :all => spec_tasks
end


task :spec => "spec:all"

BASEDIR = Pathname( __FILE__ ).dirname.relative_path_from( Pathname.pwd )
SPECDIR = BASEDIR + 'spec'

VALGRIND_OPTIONS = [
    "--tool=memcheck",
    #"--leak-check=yes",
    "--num-callers=15",
    #"--error-limit=no",
    "--partial-loads-ok=yes",
    "--undef-value-errors=no" #,
    #"--dsymutil=yes"
]

CALLGRIND_OPTIONS = [
    "--tool=callgrind",
    "--dump-instr=yes",
    "--simulate-cache=yes",
    "--collect-jumps=yes"
]

VALGRIND_MEMORYFILL_OPTIONS = [
    "--freelist-vol=100000000",
    "--malloc-fill=6D",
    "--free-fill=66 ",
]

GDB_OPTIONS = []


task :console do |task|
  cmd = [ 'irb', "-r './lib/nmatrix.rb'" ]
  run *cmd
end

task :pry do |task|
  cmd = [ 'pry', "-r './lib/nmatrix.rb'" ]
  run *cmd
end

namespace :pry do
  task :valgrind => [ :compile ] do |task|
    cmd  = [ 'valgrind' ] + VALGRIND_OPTIONS
    cmd += ['ruby', '-Ilib:ext', "-r './lib/nmatrix.rb'", "-r 'pry'", "-e 'binding.pry'"]
    run *cmd
  end
end

namespace :console do
  CONSOLE_CMD = ['irb', "-r './lib/nmatrix.rb'"]
  desc "Run console under GDB."
  task :gdb => [ :compile ] do |task|
          cmd = [ 'gdb' ] + GDB_OPTIONS
          cmd += [ '--args' ]
          cmd += CONSOLE_CMD
          run( *cmd )
  end

  desc "Run console under Valgrind."
  task :valgrind => [ :compile ] do |task|
          cmd = [ 'valgrind' ] + VALGRIND_OPTIONS
          cmd += CONSOLE_CMD
          run( *cmd )
  end
end

task :default => :spec

def run *cmd
  sh(cmd.join(" "))
end

namespace :spec do
  # partial-loads-ok and undef-value-errors necessary to ignore
  # spurious (and eminently ignorable) warnings from the ruby
  # interpreter

  RSPEC_CMD = [ 'ruby', '-S', 'rspec', '-Ilib:ext', SPECDIR.to_s ]

  #desc "Run the spec for generator.rb"
  #task :generator do |task|
  #  run 'rspec spec/generator_spec.rb'
  #end

  desc "Run specs under GDB."
  task :gdb => [ :compile ] do |task|
          cmd = [ 'gdb' ] + GDB_OPTIONS
    cmd += [ '--args' ]
    cmd += RSPEC_CMD
    run( *cmd )
  end

  desc "Run specs under cgdb."
  task :cgdb => [ :compile ] do |task|
    cmd = [ 'cgdb' ] + GDB_OPTIONS
    cmd += [ '--args' ]
    cmd += RSPEC_CMD
    run( *cmd )
  end

  desc "Run specs under Valgrind."
  task :valgrind => [ :compile ] do |task|
    cmd = [ 'valgrind' ] + VALGRIND_OPTIONS
    cmd += RSPEC_CMD
    run( *cmd )
  end

  desc "Run specs under Callgrind."
  task :callgrind => [ :compile ] do |task|
    cmd = [ 'valgrind' ] + CALLGRIND_OPTIONS
    cmd += RSPEC_CMD
    run( *cmd )
  end

end


LEAKCHECK_CMD = [ 'ruby', '-Ilib:ext', "#{SPECDIR}/leakcheck.rb" ]


desc "Run leakcheck script."
task :leakcheck => [ :compile ] do |task|
  cmd = [ 'valgrind' ] + VALGRIND_OPTIONS
  cmd += LEAKCHECK_CMD
  run( *cmd )
end

namespace :clean do
  #the generated Makefile doesn't have a soclean target, should this be removed?
  task :so do |task|
    gemspecs.each do |gemspec|
      next unless gemspec.extensions
      gemspec.extensions.each do |extconf|
        ext_name = extconf.match(/ext\/(.*)\/extconf\.rb/)[1]
        tmp_path = "tmp/#{RUBY_PLATFORM}/#{ext_name}/#{RUBY_VERSION}"
        chdir tmp_path do
          if RUBY_PLATFORM =~ /mswin/
            `nmake soclean`
          else
            mkcmd = ENV['MAKE'] || %w[gmake make].find { |c| system("#{c} -v >> /dev/null 2>&1") }
            `#{mkcmd} soclean`
          end
        end
      end
    end
  end
end


desc "Check the manifest for correctness"
task :check_manifest do |task|
  manifest_files  = File.read("Manifest.txt").split

  git_files       = `git ls-files |grep -v 'spec/'`.split
  ignore_files    = %w{.gitignore .rspec ext/nmatrix/binary_format.txt scripts/ttable_helper.rb}

  possible_files  = git_files - ignore_files

  missing_files   = possible_files - manifest_files
  extra_files     = manifest_files - possible_files

  unless missing_files.empty?
    STDERR.puts "The following files are in the git repo but not the Manifest:"
    missing_files.each { |f| STDERR.puts " -- #{f}"}
  end

  unless extra_files.empty?
    STDERR.puts "The following files are in the Manifest but may not be necessary:"
    extra_files.each { |f| STDERR.puts " -- #{f}"}
  end

  if extra_files.empty? && missing_files.empty?
    STDERR.puts "Manifest looks good!"
  end

end

require "rdoc/task"
#separate out docs for plugins?
RDoc::Task.new do |rdoc|
  rdoc.main = "README.rdoc"
  rdoc.rdoc_files.include(%w{README.rdoc History.txt LICENSE.txt CONTRIBUTING.md lib ext})
  rdoc.options << "--exclude=ext/nmatrix/extconf.rb"
  rdoc.options << "--exclude=ext/nmatrix_atlas/extconf.rb"
  rdoc.options << "--exclude=ext/nmatrix/ttable_helper.rb"
  rdoc.options << "--exclude=lib/nmatrix/rspec.rb"
end

# jruby tasks

namespace :jruby do

  PROJECT_DIR = File.expand_path(".",Dir.pwd)

  BUILD_DIR = "build"
  CLASSES_DIR = "../build/classes"
  TEST_CLASSES_DIR = "build/testClasses"

  JRUBY_DIR = "#{PROJECT_DIR}/ext/nmatrix_java"
  VENDOR_DIR = "#{JRUBY_DIR}/vendor"
  TARGET_DIR = "#{JRUBY_DIR}/target"

  jars = Dir["#{VENDOR_DIR}/*.jar"]

  desc 'Compile java classes'
  task :javac do
    unless RUBY_PLATFORM == 'java'
      abort 'Please run with JRuby'
    end
    sh "mkdir -p #{JRUBY_DIR}/build/classes"
    Dir.chdir("#{JRUBY_DIR}/nmatrix")
    classes    = Dir['**/*.java']
    sh "javac -classpath #{jars.join(':')} -d #{CLASSES_DIR} #{classes.join(' ')}"
  end

  desc 'Package java classes in a jar file'
  task :jar do
    unless RUBY_PLATFORM == 'java'
      abort 'Please run with JRuby'
    end
    sh "mkdir -p #{TARGET_DIR}"
    Dir.chdir("#{JRUBY_DIR}/build/classes")
    classes = Dir['**/*.class']
    sh "jar -cf #{TARGET_DIR}/nmatrix.jar #{classes.join(' ')}"
  end

  task :all => [:javac, :jar]
end

desc "Compile java classes and Package them in a jar file"
task :jruby => 'jruby:all'

namespace :travis do
  task :env do
    if /java/ === RUBY_PLATFORM
      puts "Building for jruby"
      sh "mkdir ext/nmatrix_java/vendor"
      puts "Downloading tar file."
      sh "wget http://www-eu.apache.org/dist//commons/math/binaries/commons-math3-3.6.1-bin.tar.gz"
      puts "Unzipping tar file."
      sh "tar -zxf commons-math3-3.6.1-bin.tar.gz"
      puts "Deleting tar file."
      sh "rm commons-math3-3.6.1-bin.tar.gz"
      sh "cp -r commons-math3-3.6.1/commons-math3-3.6.1.jar ext/nmatrix_java/vendor"
    else
      puts "\n# Build environment:"
      %w[
        CC CXX
        USE_ATLAS USE_OPENBLAS USE_REF NO_EXTERNAL_LIB
        TRAVIS_OS_NAME TRAVIS_BRANCH TRAVIS_COMMIT TRAVIS_PULL_REQUEST
      ].each do |name|
        puts "- #{name}: #{ENV[name]}"
      end

      require 'rbconfig'
      puts "\n# RbConfig::MAKEFILE_CONFIG values:"
      %w[
        CC CXX CPPFLAGS CFLAGS CXXFLAGS
      ].each do |name|
        puts "- #{name}: #{RbConfig::MAKEFILE_CONFIG[name]}"
      end

      cc = RbConfig::MAKEFILE_CONFIG['CC']
      puts "\n$ #{cc} -v\n#{`#{cc} -v 2>&1`}"
    end
  end
end

# vim: syntax=ruby


================================================
FILE: ext/nmatrix/binary_format.txt
================================================
This is the proposed binary format for saving and loading NMatrix objects.

Order is little-endian.

List matrices should be converted to dense or yale matrices. There should be no serious need to load or save
linked-list matrices, since these exist primarily in order to construct efficient yale matrices.


First 64-bit block:
* ui16 major (version)
* ui16 minor
* ui16 release
* i16 NULL


Second 64-bit block:
* ui8 dtype
* ui8 stype
* ui8 itype # ui32 for dense
* ui8 symm
* i16 NULL
* ui16 dim    # if 1, NVector; otherwise, NMatrix


3rd - nth 64-bit block: shape

itype sets the number of bytes allocated for each shape entry. Since only yale uses itype, dense will pretty
much always be the UINT32 itype (see nmatrix.h). If the total number of bytes occupied by the shape array is
less than 8, the rest of the 64-bit block will be padded with zeros.


(n+1)th 64-bit block: depends on stype, symm

symm is designed to reduce file size by allowing us to not save certain elements in symmetric, hermitian, skew-
symmetric, and triangular matrices. These values will be defined in nmatrix.h; 0 indicates standard (no symmetry).
In later versions, additional patterns may be defined which might even have less to do with symmetry than
upper/lower do.

When storing a symmetric matrix, we will only store the upper portion. If the matrix is lower triangular, only the
lower portion will be stored.

For dense, we simply store the contents of the matrix exactly as in memory (or just the upper-triangular part if
symm is set).

For yale, we store:
* ui32 ndnz
* ui32 length (AKA size, the number of elements in A/IJA that aren't nil/undefined)

The latter will serve as the capacity when we read a Yale matrix.

Then we store the a array, again padding with zeros so it's a multiple of 8 bytes.

Then we store the ija array, padding with zeros so it's a multiple of 8 bytes.


================================================
FILE: ext/nmatrix/data/complex.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == complex.h
//
// Functions and classes for dealing with complex numbers.

#ifndef COMPLEX_H
#define COMPLEX_H

/*
 * Standard Includes
 */

#include <ruby.h>
#include <type_traits>
#include <iostream>
#include <cmath>

/*
 * Project Includes
 */

#include "types.h"

/*
 * Macros
 */

/*
 * Types
 */
namespace nm {

class RubyObject;
template <typename Type> class Complex;

typedef Complex<float32_t> Complex64;
typedef Complex<float64_t> Complex128;

/*
 * Data
 */

/*
 * Classes and Functions
 */

template <typename Type>
class Complex {
  public:
  // The real and immaginary parts of the complex number.
  Type r;
  Type i;

  /*
   * Default constructor.
   */
  inline Complex(Type real = 0, Type imaginary = 0) : r(real), i(imaginary) {}

  /*
   * Copy constructors.
   */
  template <typename ComplexType>
  explicit inline Complex(const Complex<ComplexType>& other) : r(other.r), i(other.i) {}

  template <typename ComplexType>
  inline Complex<Type>& operator=(const Complex<ComplexType>& other) {
    this->r = static_cast<Type>(other.r);
    this->i = static_cast<Type>(other.i);
    return *this;
  }

  explicit Complex(const RubyObject& other);

  Complex<Type>& operator=(const RubyObject& other);

  template<typename OtherType>
  inline Complex<Type>& operator=(const OtherType& real) {
    this->r = Type(real);
    this->i = Type(0);
    return *this;
  }

  /*
   * Complex conjugate function -- creates a copy, but inverted.
   */
  inline Complex<Type> conjugate() const {
    return Complex<Type>(this->r, -(this->i));
  }

  /*
   * Complex inverse function -- creates a copy, but inverted.
   *
   * FIXME: Check that this doesn't duplicate functionality of NativeType / Complex<Type>
   */
  inline Complex<Type> inverse() const {
    Complex<Type> conj = conjugate();
    Type denom = this->r * this->r + this->i * this->i;
    return Complex<Type>(conj.r / denom, conj.i / denom);
  }

  // Negative operator
  inline Complex<Type> operator-() const {
    return Complex<Type>(-this->r, -this->i);
  }


  /*
   * Binary operator definitions for various types.
   */

  ////////////////////////////////
  // Complex-Complex Operations //
  ////////////////////////////////

  template <typename OtherType>
  inline Complex<Type> operator+(const Complex<OtherType>& other) const {
    return Complex<Type>(this->r + other.r, this->i + other.i);
  }

  template <typename OtherType>
  inline Complex<Type>& operator+=(const Complex<OtherType>& other) {
    this->r += other.r;
    this->i += other.i;
    return *this;
  }

  template <typename OtherType>
  inline Complex<Type>& operator-=(const Complex<OtherType>& other) {
    this->r -= other.r;
    this->i -= other.i;
    return *this;
  }

  template <typename OtherType>
  inline Complex<Type> operator-(const Complex<OtherType>& other) const {
    return Complex<Type>(this->r - other.r, this->i - other.i);
  }

  template <typename OtherType>
  inline Complex<Type> operator*(const Complex<OtherType>& other) const {
    return Complex<Type>(this->r * other.r - this->i * other.i, this->r * other.i + this->i * other.r);
  }

  template <typename OtherType>
  inline Complex<Type>& operator*=(const Complex<OtherType>& other) {
    this->r = this->r * other.r - this->i * other.i;
    this->i = this->r * other.i + this->i * other.r;
    return *this;
  }

  template <typename OtherType>
  inline Complex<Type> operator/(const Complex<OtherType>& other) const {
    Type new_r, new_i;
    Type denom = other.i * other.i + other.r * other.r;

    new_r = (this->r * other.r + this->i * other.i) / denom;
    new_i = (this->i * other.r - this->r * other.i) / denom;

    return Complex<Type>(new_r, new_i);
  }

  template <typename OtherType>
  inline Complex<Type> operator/=(const Complex<OtherType>& other) {
    Type new_r, new_i;
    Type denom = other.i * other.i + other.r * other.r;

    new_r = (this->r * other.r + this->i * other.i) / denom;
    new_i = (this->i * other.r - this->r * other.i) / denom;

    this->r = new_r;
    this->i = new_i;
    return *this;
  }

  template <typename OtherType>
  inline bool operator<(const Complex<OtherType>& other) const {
    return (this->r < other.r) || ((this->r <= other.r) && (this->i < other.i));
  }

  template <typename OtherType>
  inline bool operator>(const Complex<OtherType>& other) const {
    return (this->r > other.r) || ((this->r >= other.r) && (this->i > other.i));
  }

  template <typename OtherType>
  inline bool operator==(const Complex<OtherType>& other) const {
    return FP_EQUAL(this->r, other.r) && FP_EQUAL(this->i, other.i);
  }

  template <typename OtherType>
  inline bool operator!=(const Complex<OtherType>& other) const {
    return !(*this == other);
  }

  template <typename OtherType>
  inline bool operator<=(const Complex<OtherType>& other) const {
    return (*this < other) || (*this == other);
  }

  template <typename OtherType>
  inline bool operator>=(const Complex<OtherType>& other) const {
    return (*this > other) || (*this == other);
  }

  template <typename OtherType>
  inline operator Complex<OtherType> () const {
    return Complex<OtherType>((OtherType)this->r, (OtherType)this->i);
  }

  ///////////////////////////////
  // Complex-Native Operations //
  ///////////////////////////////

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline Complex<Type> operator+(const NativeType& other) const {
    return *this + Complex<Type>(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline Complex<Type> operator-(const NativeType& other) const {
    return *this - Complex<Type>(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline Complex<Type> operator*(const NativeType& other) const {
    return *this * Complex<Type>(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline Complex<Type> operator/(const NativeType& other) const {
    return *this / Complex<Type>(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline bool operator<(const NativeType& other) const {
    return *this < Complex<Type>(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline bool operator>(const NativeType& other) const {
    return *this > Complex<Type>(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline bool operator==(const NativeType& other) const {
    return *this == Complex<Type>(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline bool operator!=(const NativeType& other) const {
    return *this != Complex<Type>(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline bool operator<=(const NativeType& other) const {
    return *this <= Complex<Type>(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline bool operator>=(const NativeType& other) const {
    return *this >= Complex<Type>(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline operator NativeType () const {
    return (NativeType)this->r;
  }

  operator RubyObject () const;
};

///////////////////////////////
// Native-Complex Operations //
///////////////////////////////

template <typename NativeType, typename ComplexType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline Complex<ComplexType> operator+(const NativeType& left, const Complex<ComplexType>& right) {
  return Complex<ComplexType>(left) + right;
}

template <typename NativeType, typename ComplexType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline Complex<ComplexType> operator-(const NativeType& left, const Complex<ComplexType>& right) {
  return Complex<ComplexType>(left) - right;
}

template <typename NativeType, typename ComplexType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline Complex<ComplexType> operator*(const NativeType& left, const Complex<ComplexType>& right) {
  return Complex<ComplexType>(left) * right;
}

template <typename NativeType, typename ComplexType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline Complex<ComplexType> operator/(const NativeType& left, const Complex<ComplexType>& right) {
  return Complex<ComplexType>(left) / right;
}

template <typename NativeType, typename ComplexType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator<(const NativeType left, const Complex<ComplexType>& right) {
  return Complex<ComplexType>(left) < right;
}

template <typename NativeType, typename ComplexType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator>(const NativeType left, const Complex<ComplexType>& right) {
  return Complex<ComplexType>(left) > right;
}

template <typename NativeType, typename ComplexType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator==(const NativeType left, const Complex<ComplexType>& right) {
  return Complex<ComplexType>(left) == right;
}

template <typename NativeType, typename ComplexType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator!=(const NativeType left, const Complex<ComplexType>& right) {
  return Complex<ComplexType>(left) != right;
}

template <typename NativeType, typename ComplexType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator<=(const NativeType left, const Complex<ComplexType>& right) {
  return Complex<ComplexType>(left) <= right;
}

template <typename NativeType, typename ComplexType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator>=(const NativeType left, const Complex<ComplexType>& right) {
  return Complex<ComplexType>(left) >= right;
}

template <typename Type>
inline std::ostream& operator<<(std::ostream& out, const Complex<Type>& rhs) {
  out << "(" << rhs.r << "," << rhs.i << "i)" << std::flush;
  return out;
}

// Negative operator
template <typename IntType, typename = typename std::enable_if<std::is_integral<IntType>::value>::type>
inline Complex<IntType> operator-(const Complex<IntType>& rhs) {
  return Complex<IntType>(-rhs.r, -rhs.i);
}

} // end of namespace nm

namespace std {
  template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
  nm::Complex<FloatType> piecewise_abs(const nm::Complex<FloatType>& value) {
    return nm::Complex<FloatType>(value.r < 0 ? -value.r : value.r,
                                  value.i < 0 ? -value.i : value.i);
  }

  template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
  nm::Complex<FloatType> real_abs(const nm::Complex<FloatType>& value) {
    return nm::Complex<FloatType>(value.r < 0 ? -value.r : value.r,
                                  value.i);
  }

  template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
  nm::Complex<FloatType> imag_abs(const nm::Complex<FloatType>& value) {
    return nm::Complex<FloatType>(value.r,
                                  value.i < 0 ? -value.i : value.i);
  }

  template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
  double abs(const nm::Complex<FloatType>& value) {
    return std::sqrt(double(value.r)*double(value.r) + double(value.i)*double(value.i));
  }
}

#endif // COMPLEX_H


================================================
FILE: ext/nmatrix/data/data.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == data.cpp
//
// Functions and data for dealing the data types.

/*
 * Standard Includes
 */

#include <ruby.h>
#include <stdexcept>

/*
 * Project Includes
 */

#include "types.h"
#include "data.h"

/*
 * Global Variables
 */

namespace nm {
  const char* const EWOP_OPS[nm::NUM_EWOPS] = {
    "+",
    "-",
    "*",
    "/",
    "**",
    "%",
    "==",
    "!=",
    "<",
    ">",
    "<=",
    ">="
  };

  const std::string EWOP_NAMES[nm::NUM_EWOPS] = {
    "add",
    "sub",
    "mul",
    "div",
    "pow",
    "mod",
    "eqeq",
    "neq",
    "lt",
    "gt",
    "leq",
    "geq"
  };

  const std::string NONCOM_EWOP_NAMES[nm::NUM_NONCOM_EWOPS] = {
    "atan2",
    "ldexp",
    "hypot"
  };

  const std::string UNARYOPS[nm::NUM_UNARYOPS] = {
    "sin", "cos", "tan",
    "asin", "acos", "atan",
    "sinh", "cosh", "tanh",
    "asinh", "acosh", "atanh",
    "exp", "log2", 
    "log10", "sqrt", "erf", 
    "erfc", "cbrt", "gamma",
    "negate", "floor", "ceil", "round"
  };


  /*
   * Create a RubyObject from a regular C value (given a dtype). Does not return a VALUE! To get a VALUE, you need to
   * look at the rval property of what this function returns.
   */
  nm::RubyObject rubyobj_from_cval(void* val, nm::dtype_t dtype) {
    using namespace nm;
    switch (dtype) {
      case BYTE:
        return RubyObject(*reinterpret_cast<uint8_t*>(val));

      case INT8:
        return RubyObject(*reinterpret_cast<int8_t*>(val));

      case INT16:
        return RubyObject(*reinterpret_cast<int16_t*>(val));

      case INT32:
        return RubyObject(*reinterpret_cast<int32_t*>(val));

      case INT64:
        return RubyObject(*reinterpret_cast<int64_t*>(val));

      case FLOAT32:
        return RubyObject(*reinterpret_cast<float32_t*>(val));

      case FLOAT64:
        return RubyObject(*reinterpret_cast<float64_t*>(val));

      case COMPLEX64:
        return RubyObject(*reinterpret_cast<Complex64*>(val));

      case COMPLEX128:
        return RubyObject(*reinterpret_cast<Complex128*>(val));

      default:
        try {
          throw std::logic_error("Cannot create ruby object");
        }
        catch (std::logic_error err) {
          printf("%s\n", err.what());
        }

        rb_raise(nm_eDataTypeError, "Conversion to RubyObject requested from unknown/invalid data type (did you try to convert from a VALUE?)");
    }
    return Qnil;
  }
} // end of namespace nm

extern "C" {

const char* const DTYPE_NAMES[nm::NUM_DTYPES] = {
  "byte",
  "int8",
  "int16",
  "int32",
  "int64",
  "float32",
  "float64",
  "complex64",
  "complex128",
  "object"
};


const size_t DTYPE_SIZES[nm::NUM_DTYPES] = {
  sizeof(uint8_t),
  sizeof(int8_t),
  sizeof(int16_t),
  sizeof(int32_t),
  sizeof(int64_t),
  sizeof(float32_t),
  sizeof(float64_t),
  sizeof(nm::Complex64),
  sizeof(nm::Complex128),
  sizeof(nm::RubyObject)
};


const nm::dtype_t Upcast[nm::NUM_DTYPES][nm::NUM_DTYPES] = {
  { nm::BYTE, nm::INT16, nm::INT16, nm::INT32, nm::INT64, nm::FLOAT32, nm::FLOAT64, nm::COMPLEX64, nm::COMPLEX128, nm::RUBYOBJ},
  { nm::INT16, nm::INT8, nm::INT16, nm::INT32, nm::INT64, nm::FLOAT32, nm::FLOAT64, nm::COMPLEX64, nm::COMPLEX128, nm::RUBYOBJ},
  { nm::INT16, nm::INT16, nm::INT16, nm::INT32, nm::INT64, nm::FLOAT32, nm::FLOAT64, nm::COMPLEX64, nm::COMPLEX128, nm::RUBYOBJ},
  { nm::INT32, nm::INT32, nm::INT32, nm::INT32, nm::INT64, nm::FLOAT32, nm::FLOAT64, nm::COMPLEX64, nm::COMPLEX128, nm::RUBYOBJ},
  { nm::INT64, nm::INT64, nm::INT64, nm::INT64, nm::INT64, nm::FLOAT32, nm::FLOAT64, nm::COMPLEX64, nm::COMPLEX128, nm::RUBYOBJ},
  { nm::FLOAT32, nm::FLOAT32, nm::FLOAT32, nm::FLOAT32, nm::FLOAT32, nm::FLOAT32, nm::FLOAT64, nm::COMPLEX64, nm::COMPLEX128, nm::RUBYOBJ},
  { nm::FLOAT64, nm::FLOAT64, nm::FLOAT64, nm::FLOAT64, nm::FLOAT64, nm::FLOAT64, nm::FLOAT64, nm::COMPLEX128, nm::COMPLEX128, nm::RUBYOBJ},
  { nm::COMPLEX64, nm::COMPLEX64, nm::COMPLEX64, nm::COMPLEX64, nm::COMPLEX64, nm::COMPLEX64, nm::COMPLEX128, nm::COMPLEX64, nm::COMPLEX128, nm::RUBYOBJ},
  { nm::COMPLEX128, nm::COMPLEX128, nm::COMPLEX128, nm::COMPLEX128, nm::COMPLEX128, nm::COMPLEX128, nm::COMPLEX128, nm::COMPLEX128, nm::COMPLEX128, nm::RUBYOBJ},
  { nm::RUBYOBJ, nm::RUBYOBJ, nm::RUBYOBJ, nm::RUBYOBJ, nm::RUBYOBJ, nm::RUBYOBJ, nm::RUBYOBJ, nm::RUBYOBJ, nm::RUBYOBJ, nm::RUBYOBJ}
};


/*
 * Forward Declarations
 */

/*
 * Functions
 */

/*
 * Converts a RubyObject
 */
void rubyval_to_cval(VALUE val, nm::dtype_t dtype, void* loc) {
  using namespace nm;
  switch (dtype) {
    case nm::BYTE:
      *reinterpret_cast<uint8_t*>(loc)      = static_cast<uint8_t>(RubyObject(val));
      break;

    case nm::INT8:
      *reinterpret_cast<int8_t*>(loc)        = static_cast<int8_t>(RubyObject(val));
      break;

    case nm::INT16:
      *reinterpret_cast<int16_t*>(loc)      = static_cast<int16_t>(RubyObject(val));
      break;

    case nm::INT32:
      *reinterpret_cast<int32_t*>(loc)      = static_cast<int32_t>(RubyObject(val));
      break;

    case nm::INT64:
      *reinterpret_cast<int64_t*>(loc)      = static_cast<int64_t>(RubyObject(val));
      break;

    case nm::FLOAT32:
      *reinterpret_cast<float32_t*>(loc)    = static_cast<float32_t>(RubyObject(val));
      break;

    case nm::FLOAT64:
      *reinterpret_cast<float64_t*>(loc)    = static_cast<float64_t>(RubyObject(val));
      break;

    case nm::COMPLEX64:
      *reinterpret_cast<Complex64*>(loc)    = RubyObject(val).to<Complex64>();
      break;

    case nm::COMPLEX128:
      *reinterpret_cast<Complex128*>(loc)    = RubyObject(val).to<Complex128>();
      break;

    case RUBYOBJ:
      *reinterpret_cast<VALUE*>(loc)        = val;
      //rb_raise(rb_eTypeError, "Attempting a bad conversion from a Ruby value.");
      break;

    default:
      rb_raise(rb_eTypeError, "Attempting a bad conversion.");
      break;
  }
}


/*
 * Allocate and return a piece of data of the correct dtype, converted from a
 * given RubyObject.
 */
void* rubyobj_to_cval(VALUE val, nm::dtype_t dtype) {
  size_t size =  DTYPE_SIZES[dtype];
  NM_CONSERVATIVE(nm_register_value(&val));
  void* ret_val = NM_ALLOC_N(char, size);

  rubyval_to_cval(val, dtype, ret_val);
  NM_CONSERVATIVE(nm_unregister_value(&val));
  return ret_val;
}


void nm_init_data() {
  volatile VALUE t = INT2FIX(1);
  volatile nm::RubyObject obj(t);
  volatile nm::Complex64 a(const_cast<nm::RubyObject&>(obj));
  volatile nm::Complex128 b(const_cast<nm::RubyObject&>(obj));
}


} // end of extern "C" block


================================================
FILE: ext/nmatrix/data/data.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == data.h
//
// Header file for dealing with data types.

#ifndef DATA_H
#define DATA_H

/*
 * Standard Includes
 */

#include <ruby.h>
#include <string>

/*
 * Project Includes
 */

#include "nmatrix.h"

#include "types.h"

#include "complex.h"
#include "ruby_object.h"

namespace nm {


  /*
   * Constants
   */

  const int NUM_DTYPES = 10;
  const int NUM_ITYPES = 4;
  const int NUM_EWOPS = 12;
  const int NUM_UNARYOPS = 24;
  const int NUM_NONCOM_EWOPS = 3;

  enum ewop_t {
    EW_ADD,
    EW_SUB,
    EW_MUL,
    EW_DIV,
    EW_POW,
    EW_MOD,
    EW_EQEQ,
    EW_NEQ,
    EW_LT,
    EW_GT,
    EW_LEQ,
    EW_GEQ,
  };

  enum noncom_ewop_t {
    NONCOM_EW_ATAN2,
    NONCOM_EW_LDEXP,
    NONCOM_EW_HYPOT
  };

  enum unaryop_t {
    UNARY_SIN,
    UNARY_COS,
    UNARY_TAN,
    UNARY_ASIN,
    UNARY_ACOS,
    UNARY_ATAN,
    UNARY_SINH,
    UNARY_COSH,
    UNARY_TANH,
    UNARY_ASINH,
    UNARY_ACOSH,
    UNARY_ATANH,
    UNARY_EXP,
    UNARY_LOG2,
    UNARY_LOG10,
    UNARY_SQRT,
    UNARY_ERF,
    UNARY_ERFC,
    UNARY_CBRT,
    UNARY_GAMMA,
    UNARY_NEGATE,
    UNARY_FLOOR,
    UNARY_CEIL,
    UNARY_ROUND
  };

  // element-wise and scalar operators
  extern const char* const  EWOP_OPS[nm::NUM_EWOPS];
  extern const std::string  EWOP_NAMES[nm::NUM_EWOPS];
  extern const std::string  UNARYOPS[nm::NUM_UNARYOPS];
  extern const std::string  NONCOM_EWOP_NAMES[nm::NUM_NONCOM_EWOPS];


  template <typename Type>
  Complex<Type>::Complex(const RubyObject& other) {
    *this = other;
  }

  template <typename Type>
  Complex<Type>& Complex<Type>::operator=(const RubyObject& other) {
    if (RB_TYPE_P(other.rval, T_COMPLEX)) {
      this->r = NUM2DBL(rb_funcall(other.rval, rb_intern("real"), 0));
      this->i = NUM2DBL(rb_funcall(other.rval, rb_intern("imag"), 0));
    }
    else if (RB_TYPE_P(other.rval, T_FLOAT) ||
             RB_TYPE_P(other.rval, T_FIXNUM) ||
             RB_TYPE_P(other.rval, T_BIGNUM)) {
      this->r = NUM2DBL(other.rval);
      this->i = 0.0;
    }
    else {
      rb_raise(rb_eTypeError, "not sure how to convert this type of VALUE to a complex");
    }
    return *this;
  }

  template<typename Type>
  Complex<Type>::operator RubyObject () const {
    return RubyObject(*this);
  }

  nm::RubyObject  rubyobj_from_cval(void* val, nm::dtype_t dtype);
} // end of namespace nm

/*
 * Macros
 */

#define STYPE_MARK_TABLE(name)                  \
  static void (*(name)[nm::NUM_STYPES])(STORAGE*) = {  \
    nm_dense_storage_mark,                      \
    nm_list_storage_mark,                        \
    nm_yale_storage_mark                        \
  };

#define STYPE_REGISTER_TABLE(name)              \
  static void (*(name)[nm::NUM_STYPES])(const STORAGE*) = { \
    nm_dense_storage_register,                  \
    nm_list_storage_register,                   \
    nm_yale_storage_register                    \
  };

#define STYPE_UNREGISTER_TABLE(name)              \
  static void (*(name)[nm::NUM_STYPES])(const STORAGE*) = { \
    nm_dense_storage_unregister,                \
    nm_list_storage_unregister,                 \
    nm_yale_storage_unregister                  \
  };

#define CAST_TABLE(name)                                                   \
  static STORAGE* (*(name)[nm::NUM_STYPES][nm::NUM_STYPES])(const STORAGE*, nm::dtype_t, void*) = {      \
    { nm_dense_storage_cast_copy,  nm_dense_storage_from_list,  nm_dense_storage_from_yale },  \
    { nm_list_storage_from_dense,  nm_list_storage_cast_copy,   nm_list_storage_from_yale  },  \
    { nm_yale_storage_from_dense,  nm_yale_storage_from_list,   nm_yale_storage_cast_copy  }   \
  };

/*
 * Defines a static array that hold function pointers to dtype templated
 * versions of the specified function.
 */
#define DTYPE_TEMPLATE_TABLE(fun, ret, ...) NAMED_DTYPE_TEMPLATE_TABLE(ttable, fun, ret, __VA_ARGS__)

#define NAMED_DTYPE_TEMPLATE_TABLE(name, fun, ret, ...) \
  static ret (*(name)[nm::NUM_DTYPES])(__VA_ARGS__) =  { \
    fun<uint8_t>,                                        \
    fun<int8_t>,                                        \
    fun<int16_t>,                                        \
    fun<int32_t>,                                        \
    fun<int64_t>,                                        \
    fun<float32_t>,                                      \
    fun<float64_t>,                                      \
    fun<nm::Complex64>,                                  \
    fun<nm::Complex128>,                                \
    fun<nm::RubyObject>                                 \
  };

#define DTYPE_OBJECT_STATIC_TABLE(obj, fun, ret, ...)     \
  static ret (*(ttable)[nm::NUM_DTYPES])(__VA_ARGS__) =  { \
    obj<uint8_t>::fun,                                  \
    obj<int8_t>::fun,                                    \
    obj<int16_t>::fun,                                  \
    obj<int32_t>::fun,                                  \
    obj<int64_t>::fun,                                  \
    obj<float32_t>::fun,                                \
    obj<float64_t>::fun,                                \
    obj<nm::Complex64>::fun,                            \
    obj<nm::Complex128>::fun,                            \
    obj<nm::RubyObject>::fun                            \
  };

#define NAMED_DTYPE_TEMPLATE_TABLE_NO_ROBJ(name, fun, ret, ...) \
  static ret (*(name)[nm::NUM_DTYPES])(__VA_ARGS__) =  {      \
    fun<uint8_t>,                                        \
    fun<int8_t>,                                        \
    fun<int16_t>,                                        \
    fun<int32_t>,                                        \
    fun<int64_t>,                                        \
    fun<float32_t>,                                      \
    fun<float64_t>,                                      \
    fun<nm::Complex64>,                                  \
    fun<nm::Complex128>                                \
  };


/*
 * Same as DTYPE_TEMPLATE_TABLE but for functions that have two template
 * parameters.
 *
 * The left-hand DType is used as the first index, and the right-hand side is
 * the second index.  Not all left- and right-hand side combinations are valid,
 * and an invalid combination will result in a NULL pointer.
 */
#define LR_DTYPE_TEMPLATE_TABLE(fun, ret, ...) NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, fun, ret, __VA_ARGS__)

#define NAMED_LR_DTYPE_TEMPLATE_TABLE(name, fun, ret, ...)                                                                                                                \
  static ret (*(name)[nm::NUM_DTYPES][nm::NUM_DTYPES])(__VA_ARGS__) = {  \
    {fun<uint8_t, uint8_t>, fun<uint8_t, int8_t>, fun<uint8_t, int16_t>, fun<uint8_t, int32_t>, fun<uint8_t, int64_t>, fun<uint8_t, float32_t>, fun<uint8_t, float64_t>, fun<uint8_t, nm::Complex64>, fun<uint8_t, nm::Complex128>, fun<uint8_t, nm::RubyObject>}, \
    {fun<int8_t, uint8_t>, fun<int8_t, int8_t>, fun<int8_t, int16_t>, fun<int8_t, int32_t>, fun<int8_t, int64_t>, fun<int8_t, float32_t>, fun<int8_t, float64_t>, fun<int8_t, nm::Complex64>, fun<int8_t, nm::Complex128>, fun<int8_t, nm::RubyObject>},               \
    {fun<int16_t, uint8_t>, fun<int16_t, int8_t>, fun<int16_t, int16_t>, fun<int16_t, int32_t>, fun<int16_t, int64_t>, fun<int16_t, float32_t>, fun<int16_t, float64_t>, fun<int16_t, nm::Complex64>, fun<int16_t, nm::Complex128>, fun<int16_t, nm::RubyObject>},  \
    {fun<int32_t, uint8_t>, fun<int32_t, int8_t>, fun<int32_t, int16_t>, fun<int32_t, int32_t>, fun<int32_t, int64_t>, fun<int32_t, float32_t>, fun<int32_t, float64_t>, fun<int32_t, nm::Complex64>, fun<int32_t, nm::Complex128>, fun<int32_t, nm::RubyObject>},  \
    {fun<int64_t, uint8_t>, fun<int64_t, int8_t>, fun<int64_t, int16_t>, fun<int64_t, int32_t>, fun<int64_t, int64_t>, fun<int64_t, float32_t>, fun<int64_t, float64_t>, fun<int64_t, nm::Complex64>, fun<int64_t, nm::Complex128>, fun<int64_t, nm::RubyObject>},  \
    {fun<float32_t, uint8_t>, fun<float32_t, int8_t>, fun<float32_t, int16_t>, fun<float32_t, int32_t>, fun<float32_t, int64_t>, fun<float32_t, float32_t>, fun<float32_t, float64_t>, fun<float32_t, nm::Complex64>, fun<float32_t, nm::Complex128>, fun<float32_t, nm::RubyObject>},  \
    {fun<float64_t, uint8_t>, fun<float64_t, int8_t>, fun<float64_t, int16_t>, fun<float64_t, int32_t>, fun<float64_t, int64_t>, fun<float64_t, float32_t>, fun<float64_t, float64_t>, fun<float64_t, nm::Complex64>, fun<float64_t, nm::Complex128>, fun<float64_t, nm::RubyObject>},  \
    {fun<nm::Complex64, uint8_t>, fun<nm::Complex64, int8_t>, fun<nm::Complex64, int16_t>, fun<nm::Complex64, int32_t>, fun<nm::Complex64, int64_t>, fun<nm::Complex64, float32_t>, fun<nm::Complex64, float64_t>, fun<nm::Complex64, nm::Complex64>, fun<nm::Complex64, nm::Complex128>, fun<nm::Complex64, nm::RubyObject>},               \
    {fun<nm::Complex128, uint8_t>, fun<nm::Complex128, int8_t>, fun<nm::Complex128, int16_t>, fun<nm::Complex128, int32_t>, fun<nm::Complex128, int64_t>, fun<nm::Complex128, float32_t>, fun<nm::Complex128, float64_t>, fun<nm::Complex128, nm::Complex64>, fun<nm::Complex128, nm::Complex128>, fun<nm::Complex128, nm::RubyObject>},  \
    {fun<nm::RubyObject, uint8_t>, fun<nm::RubyObject, int8_t>, fun<nm::RubyObject, int16_t>, fun<nm::RubyObject, int32_t>, fun<nm::RubyObject, int64_t>, fun<nm::RubyObject, float32_t>, fun<nm::RubyObject, float64_t>, fun<nm::RubyObject, nm::Complex64>, fun<nm::RubyObject, nm::Complex128>, fun<nm::RubyObject, nm::RubyObject>}   \
  };

/*
 * Defines a static array that holds function pointers to operation, and left-
 * and right-side dtype templated version sof the specified function.
 */
#define OP_LR_DTYPE_TEMPLATE_TABLE(fun, ret, ...) NAMED_OP_LR_DTYPE_TEMPLATE_TABLE(ttable, fun, ret, __VA_ARGS__)

#define NAMED_OP_LR_DTYPE_TEMPLATE_TABLE(name, fun, ret, ...)                                                                                                               \
  static ret (*(name)[nm::NUM_EWOPS][nm::NUM_DTYPES][nm::NUM_DTYPES])(__VA_ARGS__) = {                                                                                                  \
    {                                                                                                                                                                        \
      {fun<nm::EW_ADD, uint8_t, uint8_t>, fun<nm::EW_ADD, uint8_t, int8_t>, fun<nm::EW_ADD, uint8_t, int16_t>, fun<nm::EW_ADD, uint8_t, int32_t>, fun<nm::EW_ADD, uint8_t, int64_t>,            \
        fun<nm::EW_ADD, uint8_t, float32_t>, fun<nm::EW_ADD, uint8_t, float64_t>, fun<nm::EW_ADD, uint8_t, nm::Complex64>, fun<nm::EW_ADD, uint8_t, nm::Complex128>,                        \
        fun<nm::EW_ADD, int8_t, float32_t>, fun<nm::EW_ADD, int8_t, float64_t>, fun<nm::EW_ADD, int8_t, nm::Complex64>, fun<nm::EW_ADD, int8_t, nm::Complex128>,                            \
         NULL},                                              \
                                                                                                                                                                            \
      {fun<nm::EW_ADD, int16_t, uint8_t>, fun<nm::EW_ADD, int16_t, int8_t>, fun<nm::EW_ADD, int16_t, int16_t>, fun<nm::EW_ADD, int16_t, int32_t>, fun<nm::EW_ADD, int16_t, int64_t>,            \
        fun<nm::EW_ADD, int16_t, float32_t>, fun<nm::EW_ADD, int16_t, float64_t>, fun<nm::EW_ADD, int16_t, nm::Complex64>, fun<nm::EW_ADD, int16_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_ADD, int32_t, uint8_t>, fun<nm::EW_ADD, int32_t, int8_t>, fun<nm::EW_ADD, int32_t, int16_t>, fun<nm::EW_ADD, int32_t, int32_t>, fun<nm::EW_ADD, int32_t, int64_t>,            \
        fun<nm::EW_ADD, int32_t, float32_t>, fun<nm::EW_ADD, int32_t, float64_t>, fun<nm::EW_ADD, int32_t, nm::Complex64>, fun<nm::EW_ADD, int32_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_ADD, int64_t, uint8_t>, fun<nm::EW_ADD, int64_t, int8_t>, fun<nm::EW_ADD, int64_t, int16_t>, fun<nm::EW_ADD, int64_t, int32_t>, fun<nm::EW_ADD, int64_t, int64_t>,            \
        fun<nm::EW_ADD, int64_t, float32_t>, fun<nm::EW_ADD, int64_t, float64_t>, fun<nm::EW_ADD, int64_t, nm::Complex64>, fun<nm::EW_ADD, int64_t, nm::Complex128>,                        \
         NULL},                                           \
                                                                                                                                                                            \
      {fun<nm::EW_ADD, float32_t, uint8_t>, fun<nm::EW_ADD, float32_t, int8_t>, fun<nm::EW_ADD, float32_t, int16_t>, fun<nm::EW_ADD, float32_t, int32_t>, fun<nm::EW_ADD, float32_t, int64_t>,  \
        fun<nm::EW_ADD, float32_t, float32_t>, fun<nm::EW_ADD, float32_t, float64_t>, fun<nm::EW_ADD, float32_t, nm::Complex64>, fun<nm::EW_ADD, float32_t, nm::Complex128>,                \
         NULL},                                      \
                                                                                                                                                                            \
      {fun<nm::EW_ADD, float64_t, uint8_t>, fun<nm::EW_ADD, float64_t, int8_t>, fun<nm::EW_ADD, float64_t, int16_t>, fun<nm::EW_ADD, float64_t, int32_t>, fun<nm::EW_ADD, float64_t, int64_t>,  \
        fun<nm::EW_ADD, float64_t, float32_t>, fun<nm::EW_ADD, float64_t, float64_t>, fun<nm::EW_ADD, float64_t, nm::Complex64>, fun<nm::EW_ADD, float64_t, nm::Complex128>,                \
         NULL},                                      \
                                                                                                                                                                            \
      {fun<nm::EW_ADD, nm::Complex64, uint8_t>, fun<nm::EW_ADD, nm::Complex64, int8_t>, fun<nm::EW_ADD, nm::Complex64, int16_t>, fun<nm::EW_ADD, nm::Complex64, int32_t>,                    \
        fun<nm::EW_ADD, nm::Complex64, int64_t>, fun<nm::EW_ADD, nm::Complex64, float32_t>, fun<nm::EW_ADD, nm::Complex64, float64_t>, fun<nm::EW_ADD, nm::Complex64, nm::Complex64>,        \
        fun<nm::EW_ADD, nm::Complex64, nm::Complex128>,                                  \
         NULL},                                                                                                                  \
                                                                                                                                                                            \
      {fun<nm::EW_ADD, nm::Complex128, uint8_t>, fun<nm::EW_ADD, nm::Complex128, int8_t>, fun<nm::EW_ADD, nm::Complex128, int16_t>, fun<nm::EW_ADD, nm::Complex128, int32_t>,                \
        fun<nm::EW_ADD, nm::Complex128, int64_t>, fun<nm::EW_ADD, nm::Complex128, float32_t>, fun<nm::EW_ADD, nm::Complex128, float64_t>, fun<nm::EW_ADD, nm::Complex128, nm::Complex64>,    \
        fun<nm::EW_ADD, nm::Complex128, nm::Complex128>,                                \
         NULL},                                                                                                                \
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_ADD, nm::RubyObject, nm::RubyObject>}                                                  \
    },                                                                                                                                                                      \
                                                                                                                                                                            \
    {                                                                                                                                                                        \
      {fun<nm::EW_SUB, uint8_t, uint8_t>, fun<nm::EW_SUB, uint8_t, int8_t>, fun<nm::EW_SUB, uint8_t, int16_t>, fun<nm::EW_SUB, uint8_t, int32_t>, fun<nm::EW_SUB, uint8_t, int64_t>,            \
        fun<nm::EW_SUB, uint8_t, float32_t>, fun<nm::EW_SUB, uint8_t, float64_t>, fun<nm::EW_SUB, uint8_t, nm::Complex64>, fun<nm::EW_SUB, uint8_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_SUB, int8_t, uint8_t>, fun<nm::EW_SUB, int8_t, int8_t>, fun<nm::EW_SUB, int8_t, int16_t>, fun<nm::EW_SUB, int8_t, int32_t>, fun<nm::EW_SUB, int8_t, int64_t>,                  \
        fun<nm::EW_SUB, int8_t, float32_t>, fun<nm::EW_SUB, int8_t, float64_t>, fun<nm::EW_SUB, int8_t, nm::Complex64>, fun<nm::EW_SUB, int8_t, nm::Complex128>,                            \
         NULL},                                              \
                                                                                                                                                                            \
      {fun<nm::EW_SUB, int16_t, uint8_t>, fun<nm::EW_SUB, int16_t, int8_t>, fun<nm::EW_SUB, int16_t, int16_t>, fun<nm::EW_SUB, int16_t, int32_t>, fun<nm::EW_SUB, int16_t, int64_t>,            \
        fun<nm::EW_SUB, int16_t, float32_t>, fun<nm::EW_SUB, int16_t, float64_t>, fun<nm::EW_SUB, int16_t, nm::Complex64>, fun<nm::EW_SUB, int16_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_SUB, int32_t, uint8_t>, fun<nm::EW_SUB, int32_t, int8_t>, fun<nm::EW_SUB, int32_t, int16_t>, fun<nm::EW_SUB, int32_t, int32_t>, fun<nm::EW_SUB, int32_t, int64_t>,            \
        fun<nm::EW_SUB, int32_t, float32_t>, fun<nm::EW_SUB, int32_t, float64_t>, fun<nm::EW_SUB, int32_t, nm::Complex64>, fun<nm::EW_SUB, int32_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_SUB, int64_t, uint8_t>, fun<nm::EW_SUB, int64_t, int8_t>, fun<nm::EW_SUB, int64_t, int16_t>, fun<nm::EW_SUB, int64_t, int32_t>, fun<nm::EW_SUB, int64_t, int64_t>,            \
        fun<nm::EW_SUB, int64_t, float32_t>, fun<nm::EW_SUB, int64_t, float64_t>, fun<nm::EW_SUB, int64_t, nm::Complex64>, fun<nm::EW_SUB, int64_t, nm::Complex128>,                        \
         NULL},                                           \
                                                                                                                                                                            \
      {fun<nm::EW_SUB, float32_t, uint8_t>, fun<nm::EW_SUB, float32_t, int8_t>, fun<nm::EW_SUB, float32_t, int16_t>, fun<nm::EW_SUB, float32_t, int32_t>, fun<nm::EW_SUB, float32_t, int64_t>,  \
        fun<nm::EW_SUB, float32_t, float32_t>, fun<nm::EW_SUB, float32_t, float64_t>, fun<nm::EW_SUB, float32_t, nm::Complex64>, fun<nm::EW_SUB, float32_t, nm::Complex128>,                \
         NULL},                                      \
                                                                                                                                                                            \
      {fun<nm::EW_SUB, float64_t, uint8_t>, fun<nm::EW_SUB, float64_t, int8_t>, fun<nm::EW_SUB, float64_t, int16_t>, fun<nm::EW_SUB, float64_t, int32_t>, fun<nm::EW_SUB, float64_t, int64_t>,  \
        fun<nm::EW_SUB, float64_t, float32_t>, fun<nm::EW_SUB, float64_t, float64_t>, fun<nm::EW_SUB, float64_t, nm::Complex64>, fun<nm::EW_SUB, float64_t, nm::Complex128>,                \
         NULL},                                      \
                                                                                                                                                                            \
      {fun<nm::EW_SUB, nm::Complex64, uint8_t>, fun<nm::EW_SUB, nm::Complex64, int8_t>, fun<nm::EW_SUB, nm::Complex64, int16_t>, fun<nm::EW_SUB, nm::Complex64, int32_t>,                    \
        fun<nm::EW_SUB, nm::Complex64, int64_t>, fun<nm::EW_SUB, nm::Complex64, float32_t>, fun<nm::EW_SUB, nm::Complex64, float64_t>, fun<nm::EW_SUB, nm::Complex64, nm::Complex64>,        \
        fun<nm::EW_SUB, nm::Complex64, nm::Complex128>,                                  \
         NULL},                                                                                                                  \
                                                                                                                                                                            \
      {fun<nm::EW_SUB, nm::Complex128, uint8_t>, fun<nm::EW_SUB, nm::Complex128, int8_t>, fun<nm::EW_SUB, nm::Complex128, int16_t>, fun<nm::EW_SUB, nm::Complex128, int32_t>,                \
        fun<nm::EW_SUB, nm::Complex128, int64_t>, fun<nm::EW_SUB, nm::Complex128, float32_t>, fun<nm::EW_SUB, nm::Complex128, float64_t>, fun<nm::EW_SUB, nm::Complex128, nm::Complex64>,    \
        fun<nm::EW_SUB, nm::Complex128, nm::Complex128>,                                \
                                                                                                                                                                            \
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_SUB, nm::RubyObject, nm::RubyObject>}                                                  \
    },                                                                                                                                                                      \
                                                                                                                                                                            \
    {                                                                                                                                                                        \
      {fun<nm::EW_MUL, uint8_t, uint8_t>, fun<nm::EW_MUL, uint8_t, int8_t>, fun<nm::EW_MUL, uint8_t, int16_t>, fun<nm::EW_MUL, uint8_t, int32_t>, fun<nm::EW_MUL, uint8_t, int64_t>,            \
        fun<nm::EW_MUL, uint8_t, float32_t>, fun<nm::EW_MUL, uint8_t, float64_t>, fun<nm::EW_MUL, uint8_t, nm::Complex64>, fun<nm::EW_MUL, uint8_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_MUL, int8_t, uint8_t>, fun<nm::EW_MUL, int8_t, int8_t>, fun<nm::EW_MUL, int8_t, int16_t>, fun<nm::EW_MUL, int8_t, int32_t>, fun<nm::EW_MUL, int8_t, int64_t>,                  \
        fun<nm::EW_MUL, int8_t, float32_t>, fun<nm::EW_MUL, int8_t, float64_t>, fun<nm::EW_MUL, int8_t, nm::Complex64>, fun<nm::EW_MUL, int8_t, nm::Complex128>,                            \
         NULL},                                              \
                                                                                                                                                                            \
      {fun<nm::EW_MUL, int16_t, uint8_t>, fun<nm::EW_MUL, int16_t, int8_t>, fun<nm::EW_MUL, int16_t, int16_t>, fun<nm::EW_MUL, int16_t, int32_t>, fun<nm::EW_MUL, int16_t, int64_t>,            \
        fun<nm::EW_MUL, int16_t, float32_t>, fun<nm::EW_MUL, int16_t, float64_t>, fun<nm::EW_MUL, int16_t, nm::Complex64>, fun<nm::EW_MUL, int16_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_MUL, int32_t, uint8_t>, fun<nm::EW_MUL, int32_t, int8_t>, fun<nm::EW_MUL, int32_t, int16_t>, fun<nm::EW_MUL, int32_t, int32_t>, fun<nm::EW_MUL, int32_t, int64_t>,            \
        fun<nm::EW_MUL, int32_t, float32_t>, fun<nm::EW_MUL, int32_t, float64_t>, fun<nm::EW_MUL, int32_t, nm::Complex64>, fun<nm::EW_MUL, int32_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_MUL, int64_t, uint8_t>, fun<nm::EW_MUL, int64_t, int8_t>, fun<nm::EW_MUL, int64_t, int16_t>, fun<nm::EW_MUL, int64_t, int32_t>, fun<nm::EW_MUL, int64_t, int64_t>,            \
        fun<nm::EW_MUL, int64_t, float32_t>, fun<nm::EW_MUL, int64_t, float64_t>, fun<nm::EW_MUL, int64_t, nm::Complex64>, fun<nm::EW_MUL, int64_t, nm::Complex128>,                        \
         NULL},                                           \
                                                                                                                                                                            \
      {fun<nm::EW_MUL, float32_t, uint8_t>, fun<nm::EW_MUL, float32_t, int8_t>, fun<nm::EW_MUL, float32_t, int16_t>, fun<nm::EW_MUL, float32_t, int32_t>, fun<nm::EW_MUL, float32_t, int64_t>,  \
        fun<nm::EW_MUL, float32_t, float32_t>, fun<nm::EW_MUL, float32_t, float64_t>, fun<nm::EW_MUL, float32_t, nm::Complex64>, fun<nm::EW_MUL, float32_t, nm::Complex128>,                \
         NULL},                                      \
                                                                                                                                                                            \
      {fun<nm::EW_MUL, float64_t, uint8_t>, fun<nm::EW_MUL, float64_t, int8_t>, fun<nm::EW_MUL, float64_t, int16_t>, fun<nm::EW_MUL, float64_t, int32_t>, fun<nm::EW_MUL, float64_t, int64_t>,  \
        fun<nm::EW_MUL, float64_t, float32_t>, fun<nm::EW_MUL, float64_t, float64_t>, fun<nm::EW_MUL, float64_t, nm::Complex64>, fun<nm::EW_MUL, float64_t, nm::Complex128>,                \
         NULL},                                      \
                                                                                                                                                                            \
      {fun<nm::EW_MUL, nm::Complex64, uint8_t>, fun<nm::EW_MUL, nm::Complex64, int8_t>, fun<nm::EW_MUL, nm::Complex64, int16_t>, fun<nm::EW_MUL, nm::Complex64, int32_t>,                    \
        fun<nm::EW_MUL, nm::Complex64, int64_t>, fun<nm::EW_MUL, nm::Complex64, float32_t>, fun<nm::EW_MUL, nm::Complex64, float64_t>, fun<nm::EW_MUL, nm::Complex64, nm::Complex64>,        \
        fun<nm::EW_MUL, nm::Complex64, nm::Complex128>,                                  \
         NULL},                                                                                                                  \
                                                                                                                                                                            \
      {fun<nm::EW_MUL, nm::Complex128, uint8_t>, fun<nm::EW_MUL, nm::Complex128, int8_t>, fun<nm::EW_MUL, nm::Complex128, int16_t>, fun<nm::EW_MUL, nm::Complex128, int32_t>,                \
        fun<nm::EW_MUL, nm::Complex128, int64_t>, fun<nm::EW_MUL, nm::Complex128, float32_t>, fun<nm::EW_MUL, nm::Complex128, float64_t>, fun<nm::EW_MUL, nm::Complex128, nm::Complex64>,    \
        fun<nm::EW_MUL, nm::Complex128, nm::Complex128>,                                \
                                                                                                                                                                            \
    {                                                                                                                                                                        \
      {fun<nm::EW_DIV, uint8_t, uint8_t>, fun<nm::EW_DIV, uint8_t, int8_t>, fun<nm::EW_DIV, uint8_t, int16_t>, fun<nm::EW_DIV, uint8_t, int32_t>, fun<nm::EW_DIV, uint8_t, int64_t>,            \
        fun<nm::EW_DIV, uint8_t, float32_t>, fun<nm::EW_DIV, uint8_t, float64_t>, fun<nm::EW_DIV, uint8_t, nm::Complex64>, fun<nm::EW_DIV, uint8_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_DIV, int8_t, uint8_t>, fun<nm::EW_DIV, int8_t, int8_t>, fun<nm::EW_DIV, int8_t, int16_t>, fun<nm::EW_DIV, int8_t, int32_t>, fun<nm::EW_DIV, int8_t, int64_t>,                  \
        fun<nm::EW_DIV, int8_t, float32_t>, fun<nm::EW_DIV, int8_t, float64_t>, fun<nm::EW_DIV, int8_t, nm::Complex64>, fun<nm::EW_DIV, int8_t, nm::Complex128>,                            \
         NULL},                                              \
                                                                                                                                                                            \
      {fun<nm::EW_DIV, int16_t, uint8_t>, fun<nm::EW_DIV, int16_t, int8_t>, fun<nm::EW_DIV, int16_t, int16_t>, fun<nm::EW_DIV, int16_t, int32_t>, fun<nm::EW_DIV, int16_t, int64_t>,            \
        fun<nm::EW_DIV, int16_t, float32_t>, fun<nm::EW_DIV, int16_t, float64_t>, fun<nm::EW_DIV, int16_t, nm::Complex64>, fun<nm::EW_DIV, int16_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_DIV, int32_t, uint8_t>, fun<nm::EW_DIV, int32_t, int8_t>, fun<nm::EW_DIV, int32_t, int16_t>, fun<nm::EW_DIV, int32_t, int32_t>, fun<nm::EW_DIV, int32_t, int64_t>,            \
        fun<nm::EW_DIV, int32_t, float32_t>, fun<nm::EW_DIV, int32_t, float64_t>, fun<nm::EW_DIV, int32_t, nm::Complex64>, fun<nm::EW_DIV, int32_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_DIV, int64_t, uint8_t>, fun<nm::EW_DIV, int64_t, int8_t>, fun<nm::EW_DIV, int64_t, int16_t>, fun<nm::EW_DIV, int64_t, int32_t>, fun<nm::EW_DIV, int64_t, int64_t>,            \
        fun<nm::EW_DIV, int64_t, float32_t>, fun<nm::EW_DIV, int64_t, float64_t>, fun<nm::EW_DIV, int64_t, nm::Complex64>, fun<nm::EW_DIV, int64_t, nm::Complex128>,                        \
         NULL},                                           \
                                                                                                                                                                            \
      {fun<nm::EW_DIV, float32_t, uint8_t>, fun<nm::EW_DIV, float32_t, int8_t>, fun<nm::EW_DIV, float32_t, int16_t>, fun<nm::EW_DIV, float32_t, int32_t>, fun<nm::EW_DIV, float32_t, int64_t>,  \
        fun<nm::EW_DIV, float32_t, float32_t>, fun<nm::EW_DIV, float32_t, float64_t>, fun<nm::EW_DIV, float32_t, nm::Complex64>, fun<nm::EW_DIV, float32_t, nm::Complex128>,                \
         NULL},                                      \
                                                                                                                                                                            \
      {fun<nm::EW_DIV, float64_t, uint8_t>, fun<nm::EW_DIV, float64_t, int8_t>, fun<nm::EW_DIV, float64_t, int16_t>, fun<nm::EW_DIV, float64_t, int32_t>, fun<nm::EW_DIV, float64_t, int64_t>,  \
        fun<nm::EW_DIV, float64_t, float32_t>, fun<nm::EW_DIV, float64_t, float64_t>, fun<nm::EW_DIV, float64_t, nm::Complex64>, fun<nm::EW_DIV, float64_t, nm::Complex128>,                \
         NULL},                                      \
                                                                                                                                                                            \
      {fun<nm::EW_DIV, nm::Complex64, uint8_t>, fun<nm::EW_DIV, nm::Complex64, int8_t>, fun<nm::EW_DIV, nm::Complex64, int16_t>, fun<nm::EW_DIV, nm::Complex64, int32_t>,                    \
        fun<nm::EW_DIV, nm::Complex64, int64_t>, fun<nm::EW_DIV, nm::Complex64, float32_t>, fun<nm::EW_DIV, nm::Complex64, float64_t>, fun<nm::EW_DIV, nm::Complex64, nm::Complex64>,        \
        fun<nm::EW_DIV, nm::Complex64, nm::Complex128>,                                  \
         NULL},                                                                                                                  \
                                                                                                                                                                            \
      {fun<nm::EW_DIV, nm::Complex128, uint8_t>, fun<nm::EW_DIV, nm::Complex128, int8_t>, fun<nm::EW_DIV, nm::Complex128, int16_t>, fun<nm::EW_DIV, nm::Complex128, int32_t>,                \
        fun<nm::EW_DIV, nm::Complex128, int64_t>, fun<nm::EW_DIV, nm::Complex128, float32_t>, fun<nm::EW_DIV, nm::Complex128, float64_t>, fun<nm::EW_DIV, nm::Complex128, nm::Complex64>,    \
        fun<nm::EW_DIV, nm::Complex128, nm::Complex128>,                                \
         NULL},                                                                                                                \
\
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_DIV, nm::RubyObject, nm::RubyObject>}                                                  \
    },                                                                                                                                                                      \
      \
    { \
      {fun<nm::EW_POW, uint8_t, uint8_t>, fun<nm::EW_POW, uint8_t, int8_t>, fun<nm::EW_POW, uint8_t, int16_t>, fun<nm::EW_POW, uint8_t, int32_t>, fun<nm::EW_POW, uint8_t, int64_t>,            \
        fun<nm::EW_POW, uint8_t, float32_t>, fun<nm::EW_POW, uint8_t, float64_t>, fun<nm::EW_POW, uint8_t, nm::Complex64>, fun<nm::EW_POW, uint8_t, nm::Complex128>,                        \
 NULL},                                            \
\
      {fun<nm::EW_POW, int8_t, uint8_t>, fun<nm::EW_POW, int8_t, int8_t>, fun<nm::EW_POW, int8_t, int16_t>, fun<nm::EW_POW, int8_t, int32_t>, fun<nm::EW_POW, int8_t, int64_t>,                  \
        fun<nm::EW_POW, int8_t, float32_t>, fun<nm::EW_POW, int8_t, float64_t>, fun<nm::EW_POW, int8_t, nm::Complex64>, fun<nm::EW_POW, int8_t, nm::Complex128>,                            \
 NULL},                                              \
\
      {fun<nm::EW_POW, int16_t, uint8_t>, fun<nm::EW_POW, int16_t, int8_t>, fun<nm::EW_POW, int16_t, int16_t>, fun<nm::EW_POW, int16_t, int32_t>, fun<nm::EW_POW, int16_t, int64_t>,            \
        fun<nm::EW_POW, int16_t, float32_t>, fun<nm::EW_POW, int16_t, float64_t>, fun<nm::EW_POW, int16_t, nm::Complex64>, fun<nm::EW_POW, int16_t, nm::Complex128>,                        \
 NULL},                                            \
\
      {fun<nm::EW_POW, int32_t, uint8_t>, fun<nm::EW_POW, int32_t, int8_t>, fun<nm::EW_POW, int32_t, int16_t>, fun<nm::EW_POW, int32_t, int32_t>, fun<nm::EW_POW, int32_t, int64_t>,            \
        fun<nm::EW_POW, int32_t, float32_t>, fun<nm::EW_POW, int32_t, float64_t>, fun<nm::EW_POW, int32_t, nm::Complex64>, fun<nm::EW_POW, int32_t, nm::Complex128>,                        \
 NULL},                                            \
\
      {fun<nm::EW_POW, int64_t, uint8_t>, fun<nm::EW_POW, int64_t, int8_t>, fun<nm::EW_POW, int64_t, int16_t>, fun<nm::EW_POW, int64_t, int32_t>, fun<nm::EW_POW, int64_t, int64_t>,            \
        fun<nm::EW_POW, int64_t, float32_t>, fun<nm::EW_POW, int64_t, float64_t>, fun<nm::EW_POW, int64_t, nm::Complex64>, fun<nm::EW_POW, int64_t, nm::Complex128>,                        \
 NULL},                                           \
\
      {fun<nm::EW_POW, float32_t, uint8_t>, fun<nm::EW_POW, float32_t, int8_t>, fun<nm::EW_POW, float32_t, int16_t>, fun<nm::EW_POW, float32_t, int32_t>, fun<nm::EW_POW, float32_t, int64_t>,  \
        fun<nm::EW_POW, float32_t, float32_t>, fun<nm::EW_POW, float32_t, float64_t>, fun<nm::EW_POW, float32_t, nm::Complex64>, fun<nm::EW_POW, float32_t, nm::Complex128>,                \
 NULL},                                      \
\
      {fun<nm::EW_POW, float64_t, uint8_t>, fun<nm::EW_POW, float64_t, int8_t>, fun<nm::EW_POW, float64_t, int16_t>, fun<nm::EW_POW, float64_t, int32_t>, fun<nm::EW_POW, float64_t, int64_t>,  \
        fun<nm::EW_POW, float64_t, float32_t>, fun<nm::EW_POW, float64_t, float64_t>, fun<nm::EW_POW, float64_t, nm::Complex64>, fun<nm::EW_POW, float64_t, nm::Complex128>,                \
 NULL},                                      \
\
      {fun<nm::EW_POW, nm::Complex64, uint8_t>, fun<nm::EW_POW, nm::Complex64, int8_t>, fun<nm::EW_POW, nm::Complex64, int16_t>, fun<nm::EW_POW, nm::Complex64, int32_t>,                    \
        fun<nm::EW_POW, nm::Complex64, int64_t>, fun<nm::EW_POW, nm::Complex64, float32_t>, fun<nm::EW_POW, nm::Complex64, float64_t>, fun<nm::EW_POW, nm::Complex64, nm::Complex64>,        \
        fun<nm::EW_POW, nm::Complex64, nm::Complex128>,                                  \
 NULL},                                                                                                                  \
\
      {fun<nm::EW_POW, nm::Complex128, uint8_t>, fun<nm::EW_POW, nm::Complex128, int8_t>, fun<nm::EW_POW, nm::Complex128, int16_t>, fun<nm::EW_POW, nm::Complex128, int32_t>,                \
        fun<nm::EW_POW, nm::Complex128, int64_t>, fun<nm::EW_POW, nm::Complex128, float32_t>, fun<nm::EW_POW, nm::Complex128, float64_t>, fun<nm::EW_POW, nm::Complex128, nm::Complex64>,    \
        fun<nm::EW_POW, nm::Complex128, nm::Complex128>,                                \
 NULL},                                                                                                                \
\
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_POW, nm::RubyObject, nm::RubyObject>}                                                  \
    },  \
\
    {                                                                                                                                                                        \
      {fun<nm::EW_MOD, uint8_t, uint8_t>, fun<nm::EW_MOD, uint8_t, int8_t>, fun<nm::EW_MOD, uint8_t, int16_t>, fun<nm::EW_MOD, uint8_t, int32_t>, fun<nm::EW_MOD, uint8_t, int64_t>,            \
        fun<nm::EW_MOD, uint8_t, float32_t>, fun<nm::EW_MOD, uint8_t, float64_t>, fun<nm::EW_MOD, uint8_t, nm::Complex64>, fun<nm::EW_MOD, uint8_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_MOD, int8_t, uint8_t>, fun<nm::EW_MOD, int8_t, int8_t>, fun<nm::EW_MOD, int8_t, int16_t>, fun<nm::EW_MOD, int8_t, int32_t>, fun<nm::EW_MOD, int8_t, int64_t>,                  \
        fun<nm::EW_MOD, int8_t, float32_t>, fun<nm::EW_MOD, int8_t, float64_t>, fun<nm::EW_MOD, int8_t, nm::Complex64>, fun<nm::EW_MOD, int8_t, nm::Complex128>,                            \
         NULL},                                              \
                                                                                                                                                                            \
      {fun<nm::EW_MOD, int16_t, uint8_t>, fun<nm::EW_MOD, int16_t, int8_t>, fun<nm::EW_MOD, int16_t, int16_t>, fun<nm::EW_MOD, int16_t, int32_t>, fun<nm::EW_MOD, int16_t, int64_t>,            \
        fun<nm::EW_MOD, int16_t, float32_t>, fun<nm::EW_MOD, int16_t, float64_t>, fun<nm::EW_MOD, int16_t, nm::Complex64>, fun<nm::EW_MOD, int16_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_MOD, int32_t, uint8_t>, fun<nm::EW_MOD, int32_t, int8_t>, fun<nm::EW_MOD, int32_t, int16_t>, fun<nm::EW_MOD, int32_t, int32_t>, fun<nm::EW_MOD, int32_t, int64_t>,            \
        fun<nm::EW_MOD, int32_t, float32_t>, fun<nm::EW_MOD, int32_t, float64_t>, fun<nm::EW_MOD, int32_t, nm::Complex64>, fun<nm::EW_MOD, int32_t, nm::Complex128>,                        \
         NULL},                                            \
                                                                                                                                                                            \
      {fun<nm::EW_MOD, int64_t, uint8_t>, fun<nm::EW_MOD, int64_t, int8_t>, fun<nm::EW_MOD, int64_t, int16_t>, fun<nm::EW_MOD, int64_t, int32_t>, fun<nm::EW_MOD, int64_t, int64_t>,            \
        fun<nm::EW_MOD, int64_t, float32_t>, fun<nm::EW_MOD, int64_t, float64_t>, fun<nm::EW_MOD, int64_t, nm::Complex64>, fun<nm::EW_MOD, int64_t, nm::Complex128>,                        \
         NULL},                                           \
                                                                                                                                                                            \
      {fun<nm::EW_MOD, float32_t, uint8_t>, fun<nm::EW_MOD, float32_t, int8_t>, fun<nm::EW_MOD, float32_t, int16_t>, fun<nm::EW_MOD, float32_t, int32_t>, fun<nm::EW_MOD, float32_t, int64_t>,  \
        fun<nm::EW_MOD, float32_t, float32_t>, fun<nm::EW_MOD, float32_t, float64_t>, fun<nm::EW_MOD, float32_t, nm::Complex64>, fun<nm::EW_MOD, float32_t, nm::Complex128>,                \
         NULL},                                      \
                                                                                                                                                                            \
      {fun<nm::EW_MOD, float64_t, uint8_t>, fun<nm::EW_MOD, float64_t, int8_t>, fun<nm::EW_MOD, float64_t, int16_t>, fun<nm::EW_MOD, float64_t, int32_t>, fun<nm::EW_MOD, float64_t, int64_t>,  \
        fun<nm::EW_MOD, float64_t, float32_t>, fun<nm::EW_MOD, float64_t, float64_t>, fun<nm::EW_MOD, float64_t, nm::Complex64>, fun<nm::EW_MOD, float64_t, nm::Complex128>,                \
         NULL},                                      \
                                                                                                                                                                            \
      {fun<nm::EW_MOD, nm::Complex64, uint8_t>, fun<nm::EW_MOD, nm::Complex64, int8_t>, fun<nm::EW_MOD, nm::Complex64, int16_t>, fun<nm::EW_MOD, nm::Complex64, int32_t>,                    \
        fun<nm::EW_MOD, nm::Complex64, int64_t>, fun<nm::EW_MOD, nm::Complex64, float32_t>, fun<nm::EW_MOD, nm::Complex64, float64_t>, fun<nm::EW_MOD, nm::Complex64, nm::Complex64>,        \
        fun<nm::EW_MOD, nm::Complex64, nm::Complex128>,                                  \
         NULL},                                                                                                                  \
                                                                                                                                                                            \
      {fun<nm::EW_MOD, nm::Complex128, uint8_t>, fun<nm::EW_MOD, nm::Complex128, int8_t>, fun<nm::EW_MOD, nm::Complex128, int16_t>, fun<nm::EW_MOD, nm::Complex128, int32_t>,                \
        fun<nm::EW_MOD, nm::Complex128, int64_t>, fun<nm::EW_MOD, nm::Complex128, float32_t>, fun<nm::EW_MOD, nm::Complex128, float64_t>, fun<nm::EW_MOD, nm::Complex128, nm::Complex64>,    \
        fun<nm::EW_MOD, nm::Complex128, nm::Complex128>,                                \
         NULL},                                                                                                                \
\
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_MOD, nm::RubyObject, nm::RubyObject>}                                                  \
    },                                                                                                                                                                      \
                                                                                                                                                                            \
    {                                                                                                                                                                       \
      {fun<nm::EW_EQEQ, uint8_t, uint8_t>, fun<nm::EW_EQEQ, uint8_t, int8_t>, fun<nm::EW_EQEQ, uint8_t, int16_t>, fun<nm::EW_EQEQ, uint8_t, int32_t>, \
        fun<nm::EW_EQEQ, uint8_t, int64_t>, fun<nm::EW_EQEQ, uint8_t, float32_t>, fun<nm::EW_EQEQ, uint8_t, float64_t>, fun<nm::EW_EQEQ, uint8_t, nm::Complex64>, \
        fun<nm::EW_EQEQ, uint8_t, nm::Complex128>, \
 NULL}, \
      {fun<nm::EW_EQEQ, int8_t, uint8_t>, fun<nm::EW_EQEQ, int8_t, int8_t>, fun<nm::EW_EQEQ, int8_t, int16_t>, fun<nm::EW_EQEQ, int8_t, int32_t>, fun<nm::EW_EQEQ, int8_t, int64_t>, fun<nm::EW_EQEQ, int8_t, float32_t>, fun<nm::EW_EQEQ, int8_t, float64_t>, fun<nm::EW_EQEQ, int8_t, nm::Complex64>, fun<nm::EW_EQEQ, int8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_EQEQ, int16_t, uint8_t>, fun<nm::EW_EQEQ, int16_t, int8_t>, fun<nm::EW_EQEQ, int16_t, int16_t>, fun<nm::EW_EQEQ, int16_t, int32_t>, fun<nm::EW_EQEQ, int16_t, int64_t>, fun<nm::EW_EQEQ, int16_t, float32_t>, fun<nm::EW_EQEQ, int16_t, float64_t>, fun<nm::EW_EQEQ, int16_t, nm::Complex64>, fun<nm::EW_EQEQ, int16_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_EQEQ, int32_t, uint8_t>, fun<nm::EW_EQEQ, int32_t, int8_t>, fun<nm::EW_EQEQ, int32_t, int16_t>, fun<nm::EW_EQEQ, int32_t, int32_t>, fun<nm::EW_EQEQ, int32_t, int64_t>, fun<nm::EW_EQEQ, int32_t, float32_t>, fun<nm::EW_EQEQ, int32_t, float64_t>, fun<nm::EW_EQEQ, int32_t, nm::Complex64>, fun<nm::EW_EQEQ, int32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_EQEQ, int64_t, uint8_t>, fun<nm::EW_EQEQ, int64_t, int8_t>, fun<nm::EW_EQEQ, int64_t, int16_t>, fun<nm::EW_EQEQ, int64_t, int32_t>, fun<nm::EW_EQEQ, int64_t, int64_t>, fun<nm::EW_EQEQ, int64_t, float32_t>, fun<nm::EW_EQEQ, int64_t, float64_t>, fun<nm::EW_EQEQ, int64_t, nm::Complex64>, fun<nm::EW_EQEQ, int64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_EQEQ, float32_t, uint8_t>, fun<nm::EW_EQEQ, float32_t, int8_t>, fun<nm::EW_EQEQ, float32_t, int16_t>, fun<nm::EW_EQEQ, float32_t, int32_t>, fun<nm::EW_EQEQ, float32_t, int64_t>, fun<nm::EW_EQEQ, float32_t, float32_t>, fun<nm::EW_EQEQ, float32_t, float64_t>, fun<nm::EW_EQEQ, float32_t, nm::Complex64>, fun<nm::EW_EQEQ, float32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_EQEQ, float64_t, uint8_t>, fun<nm::EW_EQEQ, float64_t, int8_t>, fun<nm::EW_EQEQ, float64_t, int16_t>, fun<nm::EW_EQEQ, float64_t, int32_t>, fun<nm::EW_EQEQ, float64_t, int64_t>, fun<nm::EW_EQEQ, float64_t, float32_t>, fun<nm::EW_EQEQ, float64_t, float64_t>, fun<nm::EW_EQEQ, float64_t, nm::Complex64>, fun<nm::EW_EQEQ, float64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_EQEQ, nm::Complex64, uint8_t>, fun<nm::EW_EQEQ, nm::Complex64, int8_t>, fun<nm::EW_EQEQ, nm::Complex64, int16_t>, fun<nm::EW_EQEQ, nm::Complex64, int32_t>, fun<nm::EW_EQEQ, nm::Complex64, int64_t>, fun<nm::EW_EQEQ, nm::Complex64, float32_t>, fun<nm::EW_EQEQ, nm::Complex64, float64_t>, fun<nm::EW_EQEQ, nm::Complex64, nm::Complex64>, fun<nm::EW_EQEQ, nm::Complex64, nm::Complex128>, NULL}, \
      {fun<nm::EW_EQEQ, nm::Complex128, uint8_t>, fun<nm::EW_EQEQ, nm::Complex128, int8_t>, fun<nm::EW_EQEQ, nm::Complex128, int16_t>, fun<nm::EW_EQEQ, nm::Complex128, int32_t>, fun<nm::EW_EQEQ, nm::Complex128, int64_t>, fun<nm::EW_EQEQ, nm::Complex128, float32_t>, fun<nm::EW_EQEQ, nm::Complex128, float64_t>, fun<nm::EW_EQEQ, nm::Complex128, nm::Complex64>, fun<nm::EW_EQEQ, nm::Complex128, nm::Complex128>, NULL}, \
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_EQEQ, nm::RubyObject, nm::RubyObject>}  \
    }, \
    {{fun<nm::EW_NEQ, uint8_t, uint8_t>, fun<nm::EW_NEQ, uint8_t, int8_t>, fun<nm::EW_NEQ, uint8_t, int16_t>, fun<nm::EW_NEQ, uint8_t, int32_t>, fun<nm::EW_NEQ, uint8_t, int64_t>, fun<nm::EW_NEQ, uint8_t, float32_t>, fun<nm::EW_NEQ, uint8_t, float64_t>, fun<nm::EW_NEQ, uint8_t, nm::Complex64>, fun<nm::EW_NEQ, uint8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_NEQ, int8_t, uint8_t>, fun<nm::EW_NEQ, int8_t, int8_t>, fun<nm::EW_NEQ, int8_t, int16_t>, fun<nm::EW_NEQ, int8_t, int32_t>, fun<nm::EW_NEQ, int8_t, int64_t>, fun<nm::EW_NEQ, int8_t, float32_t>, fun<nm::EW_NEQ, int8_t, float64_t>, fun<nm::EW_NEQ, int8_t, nm::Complex64>, fun<nm::EW_NEQ, int8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_NEQ, int16_t, uint8_t>, fun<nm::EW_NEQ, int16_t, int8_t>, fun<nm::EW_NEQ, int16_t, int16_t>, fun<nm::EW_NEQ, int16_t, int32_t>, fun<nm::EW_NEQ, int16_t, int64_t>, fun<nm::EW_NEQ, int16_t, float32_t>, fun<nm::EW_NEQ, int16_t, float64_t>, fun<nm::EW_NEQ, int16_t, nm::Complex64>, fun<nm::EW_NEQ, int16_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_NEQ, int32_t, uint8_t>, fun<nm::EW_NEQ, int32_t, int8_t>, fun<nm::EW_NEQ, int32_t, int16_t>, fun<nm::EW_NEQ, int32_t, int32_t>, fun<nm::EW_NEQ, int32_t, int64_t>, fun<nm::EW_NEQ, int32_t, float32_t>, fun<nm::EW_NEQ, int32_t, float64_t>, fun<nm::EW_NEQ, int32_t, nm::Complex64>, fun<nm::EW_NEQ, int32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_NEQ, int64_t, uint8_t>, fun<nm::EW_NEQ, int64_t, int8_t>, fun<nm::EW_NEQ, int64_t, int16_t>, fun<nm::EW_NEQ, int64_t, int32_t>, fun<nm::EW_NEQ, int64_t, int64_t>, fun<nm::EW_NEQ, int64_t, float32_t>, fun<nm::EW_NEQ, int64_t, float64_t>, fun<nm::EW_NEQ, int64_t, nm::Complex64>, fun<nm::EW_NEQ, int64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_NEQ, float32_t, uint8_t>, fun<nm::EW_NEQ, float32_t, int8_t>, fun<nm::EW_NEQ, float32_t, int16_t>, fun<nm::EW_NEQ, float32_t, int32_t>, fun<nm::EW_NEQ, float32_t, int64_t>, fun<nm::EW_NEQ, float32_t, float32_t>, fun<nm::EW_NEQ, float32_t, float64_t>, fun<nm::EW_NEQ, float32_t, nm::Complex64>, fun<nm::EW_NEQ, float32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_NEQ, float64_t, uint8_t>, fun<nm::EW_NEQ, float64_t, int8_t>, fun<nm::EW_NEQ, float64_t, int16_t>, fun<nm::EW_NEQ, float64_t, int32_t>, fun<nm::EW_NEQ, float64_t, int64_t>, fun<nm::EW_NEQ, float64_t, float32_t>, fun<nm::EW_NEQ, float64_t, float64_t>, fun<nm::EW_NEQ, float64_t, nm::Complex64>, fun<nm::EW_NEQ, float64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_NEQ, nm::Complex64, uint8_t>, fun<nm::EW_NEQ, nm::Complex64, int8_t>, fun<nm::EW_NEQ, nm::Complex64, int16_t>, fun<nm::EW_NEQ, nm::Complex64, int32_t>, fun<nm::EW_NEQ, nm::Complex64, int64_t>, fun<nm::EW_NEQ, nm::Complex64, float32_t>, fun<nm::EW_NEQ, nm::Complex64, float64_t>, fun<nm::EW_NEQ, nm::Complex64, nm::Complex64>, fun<nm::EW_NEQ, nm::Complex64, nm::Complex128>, NULL}, \
      {fun<nm::EW_NEQ, nm::Complex128, uint8_t>, fun<nm::EW_NEQ, nm::Complex128, int8_t>, fun<nm::EW_NEQ, nm::Complex128, int16_t>, fun<nm::EW_NEQ, nm::Complex128, int32_t>, fun<nm::EW_NEQ, nm::Complex128, int64_t>, fun<nm::EW_NEQ, nm::Complex128, float32_t>, fun<nm::EW_NEQ, nm::Complex128, float64_t>, fun<nm::EW_NEQ, nm::Complex128, nm::Complex64>, fun<nm::EW_NEQ, nm::Complex128, nm::Complex128>, NULL}, \
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_NEQ, nm::RubyObject, nm::RubyObject>}}, \
    {{fun<nm::EW_LT, uint8_t, uint8_t>, fun<nm::EW_LT, uint8_t, int8_t>, fun<nm::EW_LT, uint8_t, int16_t>, fun<nm::EW_LT, uint8_t, int32_t>, fun<nm::EW_LT, uint8_t, int64_t>, fun<nm::EW_LT, uint8_t, float32_t>, fun<nm::EW_LT, uint8_t, float64_t>, fun<nm::EW_LT, uint8_t, nm::Complex64>, fun<nm::EW_LT, uint8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LT, int8_t, uint8_t>, fun<nm::EW_LT, int8_t, int8_t>, fun<nm::EW_LT, int8_t, int16_t>, fun<nm::EW_LT, int8_t, int32_t>, fun<nm::EW_LT, int8_t, int64_t>, fun<nm::EW_LT, int8_t, float32_t>, fun<nm::EW_LT, int8_t, float64_t>, fun<nm::EW_LT, int8_t, nm::Complex64>, fun<nm::EW_LT, int8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LT, int16_t, uint8_t>, fun<nm::EW_LT, int16_t, int8_t>, fun<nm::EW_LT, int16_t, int16_t>, fun<nm::EW_LT, int16_t, int32_t>, fun<nm::EW_LT, int16_t, int64_t>, fun<nm::EW_LT, int16_t, float32_t>, fun<nm::EW_LT, int16_t, float64_t>, fun<nm::EW_LT, int16_t, nm::Complex64>, fun<nm::EW_LT, int16_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LT, int32_t, uint8_t>, fun<nm::EW_LT, int32_t, int8_t>, fun<nm::EW_LT, int32_t, int16_t>, fun<nm::EW_LT, int32_t, int32_t>, fun<nm::EW_LT, int32_t, int64_t>, fun<nm::EW_LT, int32_t, float32_t>, fun<nm::EW_LT, int32_t, float64_t>, fun<nm::EW_LT, int32_t, nm::Complex64>, fun<nm::EW_LT, int32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LT, int64_t, uint8_t>, fun<nm::EW_LT, int64_t, int8_t>, fun<nm::EW_LT, int64_t, int16_t>, fun<nm::EW_LT, int64_t, int32_t>, fun<nm::EW_LT, int64_t, int64_t>, fun<nm::EW_LT, int64_t, float32_t>, fun<nm::EW_LT, int64_t, float64_t>, fun<nm::EW_LT, int64_t, nm::Complex64>, fun<nm::EW_LT, int64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LT, float32_t, uint8_t>, fun<nm::EW_LT, float32_t, int8_t>, fun<nm::EW_LT, float32_t, int16_t>, fun<nm::EW_LT, float32_t, int32_t>, fun<nm::EW_LT, float32_t, int64_t>, fun<nm::EW_LT, float32_t, float32_t>, fun<nm::EW_LT, float32_t, float64_t>, fun<nm::EW_LT, float32_t, nm::Complex64>, fun<nm::EW_LT, float32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LT, float64_t, uint8_t>, fun<nm::EW_LT, float64_t, int8_t>, fun<nm::EW_LT, float64_t, int16_t>, fun<nm::EW_LT, float64_t, int32_t>, fun<nm::EW_LT, float64_t, int64_t>, fun<nm::EW_LT, float64_t, float32_t>, fun<nm::EW_LT, float64_t, float64_t>, fun<nm::EW_LT, float64_t, nm::Complex64>, fun<nm::EW_LT, float64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LT, nm::Complex64, uint8_t>, fun<nm::EW_LT, nm::Complex64, int8_t>, fun<nm::EW_LT, nm::Complex64, int16_t>, fun<nm::EW_LT, nm::Complex64, int32_t>, fun<nm::EW_LT, nm::Complex64, int64_t>, fun<nm::EW_LT, nm::Complex64, float32_t>, fun<nm::EW_LT, nm::Complex64, float64_t>, fun<nm::EW_LT, nm::Complex64, nm::Complex64>, fun<nm::EW_LT, nm::Complex64, nm::Complex128>, NULL}, \
      {fun<nm::EW_LT, nm::Complex128, uint8_t>, fun<nm::EW_LT, nm::Complex128, int8_t>, fun<nm::EW_LT, nm::Complex128, int16_t>, fun<nm::EW_LT, nm::Complex128, int32_t>, fun<nm::EW_LT, nm::Complex128, int64_t>, fun<nm::EW_LT, nm::Complex128, float32_t>, fun<nm::EW_LT, nm::Complex128, float64_t>, fun<nm::EW_LT, nm::Complex128, nm::Complex64>, fun<nm::EW_LT, nm::Complex128, nm::Complex128>, NULL}, \
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_LT, nm::RubyObject, nm::RubyObject>}}, \
    {{fun<nm::EW_GT, uint8_t, uint8_t>, fun<nm::EW_GT, uint8_t, int8_t>, fun<nm::EW_GT, uint8_t, int16_t>, fun<nm::EW_GT, uint8_t, int32_t>, fun<nm::EW_GT, uint8_t, int64_t>, fun<nm::EW_GT, uint8_t, float32_t>, fun<nm::EW_GT, uint8_t, float64_t>, fun<nm::EW_GT, uint8_t, nm::Complex64>, fun<nm::EW_GT, uint8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GT, int8_t, uint8_t>, fun<nm::EW_GT, int8_t, int8_t>, fun<nm::EW_GT, int8_t, int16_t>, fun<nm::EW_GT, int8_t, int32_t>, fun<nm::EW_GT, int8_t, int64_t>, fun<nm::EW_GT, int8_t, float32_t>, fun<nm::EW_GT, int8_t, float64_t>, fun<nm::EW_GT, int8_t, nm::Complex64>, fun<nm::EW_GT, int8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GT, int16_t, uint8_t>, fun<nm::EW_GT, int16_t, int8_t>, fun<nm::EW_GT, int16_t, int16_t>, fun<nm::EW_GT, int16_t, int32_t>, fun<nm::EW_GT, int16_t, int64_t>, fun<nm::EW_GT, int16_t, float32_t>, fun<nm::EW_GT, int16_t, float64_t>, fun<nm::EW_GT, int16_t, nm::Complex64>, fun<nm::EW_GT, int16_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GT, int32_t, uint8_t>, fun<nm::EW_GT, int32_t, int8_t>, fun<nm::EW_GT, int32_t, int16_t>, fun<nm::EW_GT, int32_t, int32_t>, fun<nm::EW_GT, int32_t, int64_t>, fun<nm::EW_GT, int32_t, float32_t>, fun<nm::EW_GT, int32_t, float64_t>, fun<nm::EW_GT, int32_t, nm::Complex64>, fun<nm::EW_GT, int32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GT, int64_t, uint8_t>, fun<nm::EW_GT, int64_t, int8_t>, fun<nm::EW_GT, int64_t, int16_t>, fun<nm::EW_GT, int64_t, int32_t>, fun<nm::EW_GT, int64_t, int64_t>, fun<nm::EW_GT, int64_t, float32_t>, fun<nm::EW_GT, int64_t, float64_t>, fun<nm::EW_GT, int64_t, nm::Complex64>, fun<nm::EW_GT, int64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GT, float32_t, uint8_t>, fun<nm::EW_GT, float32_t, int8_t>, fun<nm::EW_GT, float32_t, int16_t>, fun<nm::EW_GT, float32_t, int32_t>, fun<nm::EW_GT, float32_t, int64_t>, fun<nm::EW_GT, float32_t, float32_t>, fun<nm::EW_GT, float32_t, float64_t>, fun<nm::EW_GT, float32_t, nm::Complex64>, fun<nm::EW_GT, float32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GT, float64_t, uint8_t>, fun<nm::EW_GT, float64_t, int8_t>, fun<nm::EW_GT, float64_t, int16_t>, fun<nm::EW_GT, float64_t, int32_t>, fun<nm::EW_GT, float64_t, int64_t>, fun<nm::EW_GT, float64_t, float32_t>, fun<nm::EW_GT, float64_t, float64_t>, fun<nm::EW_GT, float64_t, nm::Complex64>, fun<nm::EW_GT, float64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GT, nm::Complex64, uint8_t>, fun<nm::EW_GT, nm::Complex64, int8_t>, fun<nm::EW_GT, nm::Complex64, int16_t>, fun<nm::EW_GT, nm::Complex64, int32_t>, fun<nm::EW_GT, nm::Complex64, int64_t>, fun<nm::EW_GT, nm::Complex64, float32_t>, fun<nm::EW_GT, nm::Complex64, float64_t>, fun<nm::EW_GT, nm::Complex64, nm::Complex64>, fun<nm::EW_GT, nm::Complex64, nm::Complex128>, NULL}, \
      {fun<nm::EW_GT, nm::Complex128, uint8_t>, fun<nm::EW_GT, nm::Complex128, int8_t>, fun<nm::EW_GT, nm::Complex128, int16_t>, fun<nm::EW_GT, nm::Complex128, int32_t>, fun<nm::EW_GT, nm::Complex128, int64_t>, fun<nm::EW_GT, nm::Complex128, float32_t>, fun<nm::EW_GT, nm::Complex128, float64_t>, fun<nm::EW_GT, nm::Complex128, nm::Complex64>, fun<nm::EW_GT, nm::Complex128, nm::Complex128>, NULL}, \
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_GT, nm::RubyObject, nm::RubyObject>}}, \
    {{fun<nm::EW_LEQ, uint8_t, uint8_t>, fun<nm::EW_LEQ, uint8_t, int8_t>, fun<nm::EW_LEQ, uint8_t, int16_t>, fun<nm::EW_LEQ, uint8_t, int32_t>, fun<nm::EW_LEQ, uint8_t, int64_t>, fun<nm::EW_LEQ, uint8_t, float32_t>, fun<nm::EW_LEQ, uint8_t, float64_t>, fun<nm::EW_LEQ, uint8_t, nm::Complex64>, fun<nm::EW_LEQ, uint8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LEQ, int8_t, uint8_t>, fun<nm::EW_LEQ, int8_t, int8_t>, fun<nm::EW_LEQ, int8_t, int16_t>, fun<nm::EW_LEQ, int8_t, int32_t>, fun<nm::EW_LEQ, int8_t, int64_t>, fun<nm::EW_LEQ, int8_t, float32_t>, fun<nm::EW_LEQ, int8_t, float64_t>, fun<nm::EW_LEQ, int8_t, nm::Complex64>, fun<nm::EW_LEQ, int8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LEQ, int16_t, uint8_t>, fun<nm::EW_LEQ, int16_t, int8_t>, fun<nm::EW_LEQ, int16_t, int16_t>, fun<nm::EW_LEQ, int16_t, int32_t>, fun<nm::EW_LEQ, int16_t, int64_t>, fun<nm::EW_LEQ, int16_t, float32_t>, fun<nm::EW_LEQ, int16_t, float64_t>, fun<nm::EW_LEQ, int16_t, nm::Complex64>, fun<nm::EW_LEQ, int16_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LEQ, int32_t, uint8_t>, fun<nm::EW_LEQ, int32_t, int8_t>, fun<nm::EW_LEQ, int32_t, int16_t>, fun<nm::EW_LEQ, int32_t, int32_t>, fun<nm::EW_LEQ, int32_t, int64_t>, fun<nm::EW_LEQ, int32_t, float32_t>, fun<nm::EW_LEQ, int32_t, float64_t>, fun<nm::EW_LEQ, int32_t, nm::Complex64>, fun<nm::EW_LEQ, int32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LEQ, int64_t, uint8_t>, fun<nm::EW_LEQ, int64_t, int8_t>, fun<nm::EW_LEQ, int64_t, int16_t>, fun<nm::EW_LEQ, int64_t, int32_t>, fun<nm::EW_LEQ, int64_t, int64_t>, fun<nm::EW_LEQ, int64_t, float32_t>, fun<nm::EW_LEQ, int64_t, float64_t>, fun<nm::EW_LEQ, int64_t, nm::Complex64>, fun<nm::EW_LEQ, int64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LEQ, float32_t, uint8_t>, fun<nm::EW_LEQ, float32_t, int8_t>, fun<nm::EW_LEQ, float32_t, int16_t>, fun<nm::EW_LEQ, float32_t, int32_t>, fun<nm::EW_LEQ, float32_t, int64_t>, fun<nm::EW_LEQ, float32_t, float32_t>, fun<nm::EW_LEQ, float32_t, float64_t>, fun<nm::EW_LEQ, float32_t, nm::Complex64>, fun<nm::EW_LEQ, float32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LEQ, float64_t, uint8_t>, fun<nm::EW_LEQ, float64_t, int8_t>, fun<nm::EW_LEQ, float64_t, int16_t>, fun<nm::EW_LEQ, float64_t, int32_t>, fun<nm::EW_LEQ, float64_t, int64_t>, fun<nm::EW_LEQ, float64_t, float32_t>, fun<nm::EW_LEQ, float64_t, float64_t>, fun<nm::EW_LEQ, float64_t, nm::Complex64>, fun<nm::EW_LEQ, float64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_LEQ, nm::Complex64, uint8_t>, fun<nm::EW_LEQ, nm::Complex64, int8_t>, fun<nm::EW_LEQ, nm::Complex64, int16_t>, fun<nm::EW_LEQ, nm::Complex64, int32_t>, fun<nm::EW_LEQ, nm::Complex64, int64_t>, fun<nm::EW_LEQ, nm::Complex64, float32_t>, fun<nm::EW_LEQ, nm::Complex64, float64_t>, fun<nm::EW_LEQ, nm::Complex64, nm::Complex64>, fun<nm::EW_LEQ, nm::Complex64, nm::Complex128>, NULL}, \
      {fun<nm::EW_LEQ, nm::Complex128, uint8_t>, fun<nm::EW_LEQ, nm::Complex128, int8_t>, fun<nm::EW_LEQ, nm::Complex128, int16_t>, fun<nm::EW_LEQ, nm::Complex128, int32_t>, fun<nm::EW_LEQ, nm::Complex128, int64_t>, fun<nm::EW_LEQ, nm::Complex128, float32_t>, fun<nm::EW_LEQ, nm::Complex128, float64_t>, fun<nm::EW_LEQ, nm::Complex128, nm::Complex64>, fun<nm::EW_LEQ, nm::Complex128, nm::Complex128>, NULL}, \
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_LEQ, nm::RubyObject, nm::RubyObject>}}, \
    {{fun<nm::EW_GEQ, uint8_t, uint8_t>, fun<nm::EW_GEQ, uint8_t, int8_t>, fun<nm::EW_GEQ, uint8_t, int16_t>, fun<nm::EW_GEQ, uint8_t, int32_t>, fun<nm::EW_GEQ, uint8_t, int64_t>, fun<nm::EW_GEQ, uint8_t, float32_t>, fun<nm::EW_GEQ, uint8_t, float64_t>, fun<nm::EW_GEQ, uint8_t, nm::Complex64>, fun<nm::EW_GEQ, uint8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GEQ, int8_t, uint8_t>, fun<nm::EW_GEQ, int8_t, int8_t>, fun<nm::EW_GEQ, int8_t, int16_t>, fun<nm::EW_GEQ, int8_t, int32_t>, fun<nm::EW_GEQ, int8_t, int64_t>, fun<nm::EW_GEQ, int8_t, float32_t>, fun<nm::EW_GEQ, int8_t, float64_t>, fun<nm::EW_GEQ, int8_t, nm::Complex64>, fun<nm::EW_GEQ, int8_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GEQ, int16_t, uint8_t>, fun<nm::EW_GEQ, int16_t, int8_t>, fun<nm::EW_GEQ, int16_t, int16_t>, fun<nm::EW_GEQ, int16_t, int32_t>, fun<nm::EW_GEQ, int16_t, int64_t>, fun<nm::EW_GEQ, int16_t, float32_t>, fun<nm::EW_GEQ, int16_t, float64_t>, fun<nm::EW_GEQ, int16_t, nm::Complex64>, fun<nm::EW_GEQ, int16_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GEQ, int32_t, uint8_t>, fun<nm::EW_GEQ, int32_t, int8_t>, fun<nm::EW_GEQ, int32_t, int16_t>, fun<nm::EW_GEQ, int32_t, int32_t>, fun<nm::EW_GEQ, int32_t, int64_t>, fun<nm::EW_GEQ, int32_t, float32_t>, fun<nm::EW_GEQ, int32_t, float64_t>, fun<nm::EW_GEQ, int32_t, nm::Complex64>, fun<nm::EW_GEQ, int32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GEQ, int64_t, uint8_t>, fun<nm::EW_GEQ, int64_t, int8_t>, fun<nm::EW_GEQ, int64_t, int16_t>, fun<nm::EW_GEQ, int64_t, int32_t>, fun<nm::EW_GEQ, int64_t, int64_t>, fun<nm::EW_GEQ, int64_t, float32_t>, fun<nm::EW_GEQ, int64_t, float64_t>, fun<nm::EW_GEQ, int64_t, nm::Complex64>, fun<nm::EW_GEQ, int64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GEQ, float32_t, uint8_t>, fun<nm::EW_GEQ, float32_t, int8_t>, fun<nm::EW_GEQ, float32_t, int16_t>, fun<nm::EW_GEQ, float32_t, int32_t>, fun<nm::EW_GEQ, float32_t, int64_t>, fun<nm::EW_GEQ, float32_t, float32_t>, fun<nm::EW_GEQ, float32_t, float64_t>, fun<nm::EW_GEQ, float32_t, nm::Complex64>, fun<nm::EW_GEQ, float32_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GEQ, float64_t, uint8_t>, fun<nm::EW_GEQ, float64_t, int8_t>, fun<nm::EW_GEQ, float64_t, int16_t>, fun<nm::EW_GEQ, float64_t, int32_t>, fun<nm::EW_GEQ, float64_t, int64_t>, fun<nm::EW_GEQ, float64_t, float32_t>, fun<nm::EW_GEQ, float64_t, float64_t>, fun<nm::EW_GEQ, float64_t, nm::Complex64>, fun<nm::EW_GEQ, float64_t, nm::Complex128>, NULL}, \
      {fun<nm::EW_GEQ, nm::Complex64, uint8_t>, fun<nm::EW_GEQ, nm::Complex64, int8_t>, fun<nm::EW_GEQ, nm::Complex64, int16_t>, fun<nm::EW_GEQ, nm::Complex64, int32_t>, fun<nm::EW_GEQ, nm::Complex64, int64_t>, fun<nm::EW_GEQ, nm::Complex64, float32_t>, fun<nm::EW_GEQ, nm::Complex64, float64_t>, fun<nm::EW_GEQ, nm::Complex64, nm::Complex64>, fun<nm::EW_GEQ, nm::Complex64, nm::Complex128>, NULL}, \
      {fun<nm::EW_GEQ, nm::Complex128, uint8_t>, fun<nm::EW_GEQ, nm::Complex128, int8_t>, fun<nm::EW_GEQ, nm::Complex128, int16_t>, fun<nm::EW_GEQ, nm::Complex128, int32_t>, fun<nm::EW_GEQ, nm::Complex128, int64_t>, fun<nm::EW_GEQ, nm::Complex128, float32_t>, fun<nm::EW_GEQ, nm::Complex128, float64_t>, fun<nm::EW_GEQ, nm::Complex128, nm::Complex64>, fun<nm::EW_GEQ, nm::Complex128, nm::Complex128>, NULL}, \
      {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, fun<nm::EW_GEQ, nm::RubyObject, nm::RubyObject>} \
    } \
  };

/*
 * Defines a static array that holds function pointers to an elementwise op,
 * itype, dtype templated versions of the specified function.
 */
#define OP_ITYPE_DTYPE_TEMPLATE_TABLE(fun, ret, ...) NAMED_OP_ITYPE_DTYPE_TEMPLATE_TABLE(ttable, fun, ret, __VA_ARGS__)

#define NAMED_OP_ITYPE_DTYPE_TEMPLATE_TABLE(name,  fun,  ret,  ...) \
  static ret (*(name)[nm::NUM_EWOPS][nm::NUM_ITYPES][nm::NUM_DTYPES])(__VA_ARGS__) = \
    {{{fun<nm::EW_ADD, uint8_t, uint8_t>,fun<nm::EW_ADD, uint8_t, int8_t>,fun<nm::EW_ADD, uint8_t, int16_t>,fun<nm::EW_ADD, uint8_t, int32_t>,fun<nm::EW_ADD, uint8_t, int64_t>,fun<nm::EW_ADD, uint8_t, float32_t>,fun<nm::EW_ADD, uint8_t, float64_t>,fun<nm::EW_ADD, uint8_t, nm::Complex64>,fun<nm::EW_ADD, uint8_t, nm::Complex128>,fun<nm::EW_ADD, uint8_t, nm::RubyObject>},\
{fun<nm::EW_ADD, uint16_t, uint8_t>,fun<nm::EW_ADD, uint16_t, int8_t>,fun<nm::EW_ADD, uint16_t, int16_t>,fun<nm::EW_ADD, uint16_t, int32_t>,fun<nm::EW_ADD, uint16_t, int64_t>,fun<nm::EW_ADD, uint16_t, float32_t>,fun<nm::EW_ADD, uint16_t, float64_t>,fun<nm::EW_ADD, uint16_t, nm::Complex64>,fun<nm::EW_ADD, uint16_t, nm::Complex128>,fun<nm::EW_ADD, uint16_t, nm::RubyObject>},\
{fun<nm::EW_ADD, uint32_t, uint8_t>,fun<nm::EW_ADD, uint32_t, int8_t>,fun<nm::EW_ADD, uint32_t, int16_t>,fun<nm::EW_ADD, uint32_t, int32_t>,fun<nm::EW_ADD, uint32_t, int64_t>,fun<nm::EW_ADD, uint32_t, float32_t>,fun<nm::EW_ADD, uint32_t, float64_t>,fun<nm::EW_ADD, uint32_t, nm::Complex64>,fun<nm::EW_ADD, uint32_t, nm::Complex128>,fun<nm::EW_ADD, uint32_t, nm::RubyObject>},\
{fun<nm::EW_ADD, uint64_t, uint8_t>,fun<nm::EW_ADD, uint64_t, int8_t>,fun<nm::EW_ADD, uint64_t, int16_t>,fun<nm::EW_ADD, uint64_t, int32_t>,fun<nm::EW_ADD, uint64_t, int64_t>,fun<nm::EW_ADD, uint64_t, float32_t>,fun<nm::EW_ADD, uint64_t, float64_t>,fun<nm::EW_ADD, uint64_t, nm::Complex64>,fun<nm::EW_ADD, uint64_t, nm::Complex128>,fun<nm::EW_ADD, uint64_t, nm::RubyObject>}},\
{{fun<nm::EW_SUB, uint8_t, uint8_t>,fun<nm::EW_SUB, uint8_t, int8_t>,fun<nm::EW_SUB, uint8_t, int16_t>,fun<nm::EW_SUB, uint8_t, int32_t>,fun<nm::EW_SUB, uint8_t, int64_t>,fun<nm::EW_SUB, uint8_t, float32_t>,fun<nm::EW_SUB, uint8_t, float64_t>,fun<nm::EW_SUB, uint8_t, nm::Complex64>,fun<nm::EW_SUB, uint8_t, nm::Complex128>,fun<nm::EW_SUB, uint8_t, nm::RubyObject>},\
{fun<nm::EW_SUB, uint16_t, uint8_t>,fun<nm::EW_SUB, uint16_t, int8_t>,fun<nm::EW_SUB, uint16_t, int16_t>,fun<nm::EW_SUB, uint16_t, int32_t>,fun<nm::EW_SUB, uint16_t, int64_t>,fun<nm::EW_SUB, uint16_t, float32_t>,fun<nm::EW_SUB, uint16_t, float64_t>,fun<nm::EW_SUB, uint16_t, nm::Complex64>,fun<nm::EW_SUB, uint16_t, nm::Complex128>,fun<nm::EW_SUB, uint16_t, nm::RubyObject>},\
{fun<nm::EW_SUB, uint32_t, uint8_t>,fun<nm::EW_SUB, uint32_t, int8_t>,fun<nm::EW_SUB, uint32_t, int16_t>,fun<nm::EW_SUB, uint32_t, int32_t>,fun<nm::EW_SUB, uint32_t, int64_t>,fun<nm::EW_SUB, uint32_t, float32_t>,fun<nm::EW_SUB, uint32_t, float64_t>,fun<nm::EW_SUB, uint32_t, nm::Complex64>,fun<nm::EW_SUB, uint32_t, nm::Complex128>,fun<nm::EW_SUB, uint32_t, nm::RubyObject>},\
{fun<nm::EW_SUB, uint64_t, uint8_t>,fun<nm::EW_SUB, uint64_t, int8_t>,fun<nm::EW_SUB, uint64_t, int16_t>,fun<nm::EW_SUB, uint64_t, int32_t>,fun<nm::EW_SUB, uint64_t, int64_t>,fun<nm::EW_SUB, uint64_t, float32_t>,fun<nm::EW_SUB, uint64_t, float64_t>,fun<nm::EW_SUB, uint64_t, nm::Complex64>,fun<nm::EW_SUB, uint64_t, nm::Complex128>,fun<nm::EW_SUB, uint64_t, nm::RubyObject>}},\
{{fun<nm::EW_MUL, uint8_t, uint8_t>,fun<nm::EW_MUL, uint8_t, int8_t>,fun<nm::EW_MUL, uint8_t, int16_t>,fun<nm::EW_MUL, uint8_t, int32_t>,fun<nm::EW_MUL, uint8_t, int64_t>,fun<nm::EW_MUL, uint8_t, float32_t>,fun<nm::EW_MUL, uint8_t, float64_t>,fun<nm::EW_MUL, uint8_t, nm::Complex64>,fun<nm::EW_MUL, uint8_t, nm::Complex128>,fun<nm::EW_MUL, uint8_t, nm::RubyObject>},\
{fun<nm::EW_MUL, uint16_t, uint8_t>,fun<nm::EW_MUL, uint16_t, int8_t>,fun<nm::EW_MUL, uint16_t, int16_t>,fun<nm::EW_MUL, uint16_t, int32_t>,fun<nm::EW_MUL, uint16_t, int64_t>,fun<nm::EW_MUL, uint16_t, float32_t>,fun<nm::EW_MUL, uint16_t, float64_t>,fun<nm::EW_MUL, uint16_t, nm::Complex64>,fun<nm::EW_MUL, uint16_t, nm::Complex128>,fun<nm::EW_MUL, uint16_t, nm::RubyObject>},\
{fun<nm::EW_MUL, uint32_t, uint8_t>,fun<nm::EW_MUL, uint32_t, int8_t>,fun<nm::EW_MUL, uint32_t, int16_t>,fun<nm::EW_MUL, uint32_t, int32_t>,fun<nm::EW_MUL, uint32_t, int64_t>,fun<nm::EW_MUL, uint32_t, float32_t>,fun<nm::EW_MUL, uint32_t, float64_t>,fun<nm::EW_MUL, uint32_t, nm::Complex64>,fun<nm::EW_MUL, uint32_t, nm::Complex128>,fun<nm::EW_MUL, uint32_t, nm::RubyObject>},\
{fun<nm::EW_MUL, uint64_t, uint8_t>,fun<nm::EW_MUL, uint64_t, int8_t>,fun<nm::EW_MUL, uint64_t, int16_t>,fun<nm::EW_MUL, uint64_t, int32_t>,fun<nm::EW_MUL, uint64_t, int64_t>,fun<nm::EW_MUL, uint64_t, float32_t>,fun<nm::EW_MUL, uint64_t, float64_t>,fun<nm::EW_MUL, uint64_t, nm::Complex64>,fun<nm::EW_MUL, uint64_t, nm::Complex128>,fun<nm::EW_MUL, uint64_t, nm::RubyObject>}},\
{{fun<nm::EW_DIV, uint8_t, uint8_t>,fun<nm::EW_DIV, uint8_t, int8_t>,fun<nm::EW_DIV, uint8_t, int16_t>,fun<nm::EW_DIV, uint8_t, int32_t>,fun<nm::EW_DIV, uint8_t, int64_t>,fun<nm::EW_DIV, uint8_t, float32_t>,fun<nm::EW_DIV, uint8_t, float64_t>,fun<nm::EW_DIV, uint8_t, nm::Complex64>,fun<nm::EW_DIV, uint8_t, nm::Complex128>,fun<nm::EW_DIV, uint8_t, nm::RubyObject>},\
{fun<nm::EW_DIV, uint16_t, uint8_t>,fun<nm::EW_DIV, uint16_t, int8_t>,fun<nm::EW_DIV, uint16_t, int16_t>,fun<nm::EW_DIV, uint16_t, int32_t>,fun<nm::EW_DIV, uint16_t, int64_t>,fun<nm::EW_DIV, uint16_t, float32_t>,fun<nm::EW_DIV, uint16_t, float64_t>,fun<nm::EW_DIV, uint16_t, nm::Complex64>,fun<nm::EW_DIV, uint16_t, nm::Complex128>,fun<nm::EW_DIV, uint16_t, nm::RubyObject>},\
{fun<nm::EW_DIV, uint32_t, uint8_t>,fun<nm::EW_DIV, uint32_t, int8_t>,fun<nm::EW_DIV, uint32_t, int16_t>,fun<nm::EW_DIV, uint32_t, int32_t>,fun<nm::EW_DIV, uint32_t, int64_t>,fun<nm::EW_DIV, uint32_t, float32_t>,fun<nm::EW_DIV, uint32_t, float64_t>,fun<nm::EW_DIV, uint32_t, nm::Complex64>,fun<nm::EW_DIV, uint32_t, nm::Complex128>,fun<nm::EW_DIV, uint32_t, nm::RubyObject>},\
{fun<nm::EW_DIV, uint64_t, uint8_t>,fun<nm::EW_DIV, uint64_t, int8_t>,fun<nm::EW_DIV, uint64_t, int16_t>,fun<nm::EW_DIV, uint64_t, int32_t>,fun<nm::EW_DIV, uint64_t, int64_t>,fun<nm::EW_DIV, uint64_t, float32_t>,fun<nm::EW_DIV, uint64_t, float64_t>,fun<nm::EW_DIV, uint64_t, nm::Complex64>,fun<nm::EW_DIV, uint64_t, nm::Complex128>,fun<nm::EW_DIV, uint64_t, nm::RubyObject>}},\
{{fun<nm::EW_MOD, uint8_t, uint8_t>,fun<nm::EW_MOD, uint8_t, int8_t>,fun<nm::EW_MOD, uint8_t, int16_t>,fun<nm::EW_MOD, uint8_t, int32_t>,fun<nm::EW_MOD, uint8_t, int64_t>,fun<nm::EW_MOD, uint8_t, float32_t>,fun<nm::EW_MOD, uint8_t, float64_t>,fun<nm::EW_MOD, uint8_t, nm::Complex64>,fun<nm::EW_MOD, uint8_t, nm::Complex128>,fun<nm::EW_MOD, uint8_t, nm::RubyObject>},\
{fun<nm::EW_MOD, uint16_t, uint8_t>,fun<nm::EW_MOD, uint16_t, int8_t>,fun<nm::EW_MOD, uint16_t, int16_t>,fun<nm::EW_MOD, uint16_t, int32_t>,fun<nm::EW_MOD, uint16_t, int64_t>,fun<nm::EW_MOD, uint16_t, float32_t>,fun<nm::EW_MOD, uint16_t, float64_t>,fun<nm::EW_MOD, uint16_t, nm::Complex64>,fun<nm::EW_MOD, uint16_t, nm::Complex128>,fun<nm::EW_MOD, uint16_t, nm::RubyObject>},\
{fun<nm::EW_MOD, uint32_t, uint8_t>,fun<nm::EW_MOD, uint32_t, int8_t>,fun<nm::EW_MOD, uint32_t, int16_t>,fun<nm::EW_MOD, uint32_t, int32_t>,fun<nm::EW_MOD, uint32_t, int64_t>,fun<nm::EW_MOD, uint32_t, float32_t>,fun<nm::EW_MOD, uint32_t, float64_t>,fun<nm::EW_MOD, uint32_t, nm::Complex64>,fun<nm::EW_MOD, uint32_t, nm::Complex128>,fun<nm::EW_MOD, uint32_t, nm::RubyObject>},\
{fun<nm::EW_MOD, uint64_t, uint8_t>,fun<nm::EW_MOD, uint64_t, int8_t>,fun<nm::EW_MOD, uint64_t, int16_t>,fun<nm::EW_MOD, uint64_t, int32_t>,fun<nm::EW_MOD, uint64_t, int64_t>,fun<nm::EW_MOD, uint64_t, float32_t>,fun<nm::EW_MOD, uint64_t, float64_t>,fun<nm::EW_MOD, uint64_t, nm::Complex64>,fun<nm::EW_MOD, uint64_t, nm::Complex128>,fun<nm::EW_MOD, uint64_t, nm::RubyObject>}},\
{{fun<nm::EW_EQEQ, uint8_t, uint8_t>,fun<nm::EW_EQEQ, uint8_t, int8_t>,fun<nm::EW_EQEQ, uint8_t, int16_t>,fun<nm::EW_EQEQ, uint8_t, int32_t>,fun<nm::EW_EQEQ, uint8_t, int64_t>,fun<nm::EW_EQEQ, uint8_t, float32_t>,fun<nm::EW_EQEQ, uint8_t, float64_t>,fun<nm::EW_EQEQ, uint8_t, nm::Complex64>,fun<nm::EW_EQEQ, uint8_t, nm::Complex128>,fun<nm::EW_EQEQ, uint8_t, nm::RubyObject>},\
{fun<nm::EW_EQEQ, uint16_t, uint8_t>,fun<nm::EW_EQEQ, uint16_t, int8_t>,fun<nm::EW_EQEQ, uint16_t, int16_t>,fun<nm::EW_EQEQ, uint16_t, int32_t>,fun<nm::EW_EQEQ, uint16_t, int64_t>,fun<nm::EW_EQEQ, uint16_t, float32_t>,fun<nm::EW_EQEQ, uint16_t, float64_t>,fun<nm::EW_EQEQ, uint16_t, nm::Complex64>,fun<nm::EW_EQEQ, uint16_t, nm::Complex128>,fun<nm::EW_EQEQ, uint16_t, nm::RubyObject>},\
{fun<nm::EW_EQEQ, uint32_t, uint8_t>,fun<nm::EW_EQEQ, uint32_t, int8_t>,fun<nm::EW_EQEQ, uint32_t, int16_t>,fun<nm::EW_EQEQ, uint32_t, int32_t>,fun<nm::EW_EQEQ, uint32_t, int64_t>,fun<nm::EW_EQEQ, uint32_t, float32_t>,fun<nm::EW_EQEQ, uint32_t, float64_t>,fun<nm::EW_EQEQ, uint32_t, nm::Complex64>,fun<nm::EW_EQEQ, uint32_t, nm::Complex128>,fun<nm::EW_EQEQ, uint32_t, nm::RubyObject>},\
{fun<nm::EW_EQEQ, uint64_t, uint8_t>,fun<nm::EW_EQEQ, uint64_t, int8_t>,fun<nm::EW_EQEQ, uint64_t, int16_t>,fun<nm::EW_EQEQ, uint64_t, int32_t>,fun<nm::EW_EQEQ, uint64_t, int64_t>,fun<nm::EW_EQEQ, uint64_t, float32_t>,fun<nm::EW_EQEQ, uint64_t, float64_t>,fun<nm::EW_EQEQ, uint64_t, nm::Complex64>,fun<nm::EW_EQEQ, uint64_t, nm::Complex128>,fun<nm::EW_EQEQ, uint64_t, nm::RubyObject>}},\
{{fun<nm::EW_NEQ, uint8_t, uint8_t>,fun<nm::EW_NEQ, uint8_t, int8_t>,fun<nm::EW_NEQ, uint8_t, int16_t>,fun<nm::EW_NEQ, uint8_t, int32_t>,fun<nm::EW_NEQ, uint8_t, int64_t>,fun<nm::EW_NEQ, uint8_t, float32_t>,fun<nm::EW_NEQ, uint8_t, float64_t>,fun<nm::EW_NEQ, uint8_t, nm::Complex64>,fun<nm::EW_NEQ, uint8_t, nm::Complex128>,fun<nm::EW_NEQ, uint8_t, nm::RubyObject>},\
{fun<nm::EW_NEQ, uint16_t, uint8_t>,fun<nm::EW_NEQ, uint16_t, int8_t>,fun<nm::EW_NEQ, uint16_t, int16_t>,fun<nm::EW_NEQ, uint16_t, int32_t>,fun<nm::EW_NEQ, uint16_t, int64_t>,fun<nm::EW_NEQ, uint16_t, float32_t>,fun<nm::EW_NEQ, uint16_t, float64_t>,fun<nm::EW_NEQ, uint16_t, nm::Complex64>,fun<nm::EW_NEQ, uint16_t, nm::Complex128>,fun<nm::EW_NEQ, uint16_t, nm::RubyObject>},\
{fun<nm::EW_NEQ, uint32_t, uint8_t>,fun<nm::EW_NEQ, uint32_t, int8_t>,fun<nm::EW_NEQ, uint32_t, int16_t>,fun<nm::EW_NEQ, uint32_t, int32_t>,fun<nm::EW_NEQ, uint32_t, int64_t>,fun<nm::EW_NEQ, uint32_t, float32_t>,fun<nm::EW_NEQ, uint32_t, float64_t>,fun<nm::EW_NEQ, uint32_t, nm::Complex64>,fun<nm::EW_NEQ, uint32_t, nm::Complex128>,fun<nm::EW_NEQ, uint32_t, nm::RubyObject>},\
{fun<nm::EW_NEQ, uint64_t, uint8_t>,fun<nm::EW_NEQ, uint64_t, int8_t>,fun<nm::EW_NEQ, uint64_t, int16_t>,fun<nm::EW_NEQ, uint64_t, int32_t>,fun<nm::EW_NEQ, uint64_t, int64_t>,fun<nm::EW_NEQ, uint64_t, float32_t>,fun<nm::EW_NEQ, uint64_t, float64_t>,fun<nm::EW_NEQ, uint64_t, nm::Complex64>,fun<nm::EW_NEQ, uint64_t, nm::Complex128>,fun<nm::EW_NEQ, uint64_t, nm::RubyObject>}},\
{{fun<nm::EW_LT, uint8_t, uint8_t>,fun<nm::EW_LT, uint8_t, int8_t>,fun<nm::EW_LT, uint8_t, int16_t>,fun<nm::EW_LT, uint8_t, int32_t>,fun<nm::EW_LT, uint8_t, int64_t>,fun<nm::EW_LT, uint8_t, float32_t>,fun<nm::EW_LT, uint8_t, float64_t>,fun<nm::EW_LT, uint8_t, nm::Complex64>,fun<nm::EW_LT, uint8_t, nm::Complex128>,fun<nm::EW_LT, uint8_t, nm::RubyObject>},\
{fun<nm::EW_LT, uint16_t, uint8_t>,fun<nm::EW_LT, uint16_t, int8_t>,fun<nm::EW_LT, uint16_t, int16_t>,fun<nm::EW_LT, uint16_t, int32_t>,fun<nm::EW_LT, uint16_t, int64_t>,fun<nm::EW_LT, uint16_t, float32_t>,fun<nm::EW_LT, uint16_t, float64_t>,fun<nm::EW_LT, uint16_t, nm::Complex64>,fun<nm::EW_LT, uint16_t, nm::Complex128>,fun<nm::EW_LT, uint16_t, nm::RubyObject>},\
{fun<nm::EW_LT, uint32_t, uint8_t>,fun<nm::EW_LT, uint32_t, int8_t>,fun<nm::EW_LT, uint32_t, int16_t>,fun<nm::EW_LT, uint32_t, int32_t>,fun<nm::EW_LT, uint32_t, int64_t>,fun<nm::EW_LT, uint32_t, float32_t>,fun<nm::EW_LT, uint32_t, float64_t>,fun<nm::EW_LT, uint32_t, nm::Complex64>,fun<nm::EW_LT, uint32_t, nm::Complex128>,fun<nm::EW_LT, uint32_t, nm::RubyObject>},\
{fun<nm::EW_LT, uint64_t, uint8_t>,fun<nm::EW_LT, uint64_t, int8_t>,fun<nm::EW_LT, uint64_t, int16_t>,fun<nm::EW_LT, uint64_t, int32_t>,fun<nm::EW_LT, uint64_t, int64_t>,fun<nm::EW_LT, uint64_t, float32_t>,fun<nm::EW_LT, uint64_t, float64_t>,fun<nm::EW_LT, uint64_t, nm::Complex64>,fun<nm::EW_LT, uint64_t, nm::Complex128>,fun<nm::EW_LT, uint64_t, nm::RubyObject>}},\
{{fun<nm::EW_GT, uint8_t, uint8_t>,fun<nm::EW_GT, uint8_t, int8_t>,fun<nm::EW_GT, uint8_t, int16_t>,fun<nm::EW_GT, uint8_t, int32_t>,fun<nm::EW_GT, uint8_t, int64_t>,fun<nm::EW_GT, uint8_t, float32_t>,fun<nm::EW_GT, uint8_t, float64_t>,fun<nm::EW_GT, uint8_t, nm::Complex64>,fun<nm::EW_GT, uint8_t, nm::Complex128>,fun<nm::EW_GT, uint8_t, nm::RubyObject>},\
{fun<nm::EW_GT, uint16_t, uint8_t>,fun<nm::EW_GT, uint16_t, int8_t>,fun<nm::EW_GT, uint16_t, int16_t>,fun<nm::EW_GT, uint16_t, int32_t>,fun<nm::EW_GT, uint16_t, int64_t>,fun<nm::EW_GT, uint16_t, float32_t>,fun<nm::EW_GT, uint16_t, float64_t>,fun<nm::EW_GT, uint16_t, nm::Complex64>,fun<nm::EW_GT, uint16_t, nm::Complex128>,fun<nm::EW_GT, uint16_t, nm::RubyObject>},\
{fun<nm::EW_GT, uint32_t, uint8_t>,fun<nm::EW_GT, uint32_t, int8_t>,fun<nm::EW_GT, uint32_t, int16_t>,fun<nm::EW_GT, uint32_t, int32_t>,fun<nm::EW_GT, uint32_t, int64_t>,fun<nm::EW_GT, uint32_t, float32_t>,fun<nm::EW_GT, uint32_t, float64_t>,fun<nm::EW_GT, uint32_t, nm::Complex64>,fun<nm::EW_GT, uint32_t, nm::Complex128>,fun<nm::EW_GT, uint32_t, nm::RubyObject>},\
{fun<nm::EW_GT, uint64_t, uint8_t>,fun<nm::EW_GT, uint64_t, int8_t>,fun<nm::EW_GT, uint64_t, int16_t>,fun<nm::EW_GT, uint64_t, int32_t>,fun<nm::EW_GT, uint64_t, int64_t>,fun<nm::EW_GT, uint64_t, float32_t>,fun<nm::EW_GT, uint64_t, float64_t>,fun<nm::EW_GT, uint64_t, nm::Complex64>,fun<nm::EW_GT, uint64_t, nm::Complex128>,fun<nm::EW_GT, uint64_t, nm::RubyObject>}},\
{{fun<nm::EW_LEQ, uint8_t, uint8_t>,fun<nm::EW_LEQ, uint8_t, int8_t>,fun<nm::EW_LEQ, uint8_t, int16_t>,fun<nm::EW_LEQ, uint8_t, int32_t>,fun<nm::EW_LEQ, uint8_t, int64_t>,fun<nm::EW_LEQ, uint8_t, float32_t>,fun<nm::EW_LEQ, uint8_t, float64_t>,fun<nm::EW_LEQ, uint8_t, nm::Complex64>,fun<nm::EW_LEQ, uint8_t, nm::Complex128>,fun<nm::EW_LEQ, uint8_t, nm::RubyObject>},\
{fun<nm::EW_LEQ, uint16_t, uint8_t>,fun<nm::EW_LEQ, uint16_t, int8_t>,fun<nm::EW_LEQ, uint16_t, int16_t>,fun<nm::EW_LEQ, uint16_t, int32_t>,fun<nm::EW_LEQ, uint16_t, int64_t>,fun<nm::EW_LEQ, uint16_t, float32_t>,fun<nm::EW_LEQ, uint16_t, float64_t>,fun<nm::EW_LEQ, uint16_t, nm::Complex64>,fun<nm::EW_LEQ, uint16_t, nm::Complex128>,fun<nm::EW_LEQ, uint16_t, nm::RubyObject>},\
{fun<nm::EW_LEQ, uint32_t, uint8_t>,fun<nm::EW_LEQ, uint32_t, int8_t>,fun<nm::EW_LEQ, uint32_t, int16_t>,fun<nm::EW_LEQ, uint32_t, int32_t>,fun<nm::EW_LEQ, uint32_t, int64_t>,fun<nm::EW_LEQ, uint32_t, float32_t>,fun<nm::EW_LEQ, uint32_t, float64_t>,fun<nm::EW_LEQ, uint32_t, nm::Complex64>,fun<nm::EW_LEQ, uint32_t, nm::Complex128>,fun<nm::EW_LEQ, uint32_t, nm::RubyObject>},\
{fun<nm::EW_LEQ, uint64_t, uint8_t>,fun<nm::EW_LEQ, uint64_t, int8_t>,fun<nm::EW_LEQ, uint64_t, int16_t>,fun<nm::EW_LEQ, uint64_t, int32_t>,fun<nm::EW_LEQ, uint64_t, int64_t>,fun<nm::EW_LEQ, uint64_t, float32_t>,fun<nm::EW_LEQ, uint64_t, float64_t>,fun<nm::EW_LEQ, uint64_t, nm::Complex64>,fun<nm::EW_LEQ, uint64_t, nm::Complex128>,fun<nm::EW_LEQ, uint64_t, nm::RubyObject>}},\
{{fun<nm::EW_GEQ, uint8_t, uint8_t>,fun<nm::EW_GEQ, uint8_t, int8_t>,fun<nm::EW_GEQ, uint8_t, int16_t>,fun<nm::EW_GEQ, uint8_t, int32_t>,fun<nm::EW_GEQ, uint8_t, int64_t>,fun<nm::EW_GEQ, uint8_t, float32_t>,fun<nm::EW_GEQ, uint8_t, float64_t>,fun<nm::EW_GEQ, uint8_t, nm::Complex64>,fun<nm::EW_GEQ, uint8_t, nm::Complex128>,fun<nm::EW_GEQ, uint8_t, nm::RubyObject>},\
{fun<nm::EW_GEQ, uint16_t, uint8_t>,fun<nm::EW_GEQ, uint16_t, int8_t>,fun<nm::EW_GEQ, uint16_t, int16_t>,fun<nm::EW_GEQ, uint16_t, int32_t>,fun<nm::EW_GEQ, uint16_t, int64_t>,fun<nm::EW_GEQ, uint16_t, float32_t>,fun<nm::EW_GEQ, uint16_t, float64_t>,fun<nm::EW_GEQ, uint16_t, nm::Complex64>,fun<nm::EW_GEQ, uint16_t, nm::Complex128>,fun<nm::EW_GEQ, uint16_t, nm::RubyObject>},\
{fun<nm::EW_GEQ, uint32_t, uint8_t>,fun<nm::EW_GEQ, uint32_t, int8_t>,fun<nm::EW_GEQ, uint32_t, int16_t>,fun<nm::EW_GEQ, uint32_t, int32_t>,fun<nm::EW_GEQ, uint32_t, int64_t>,fun<nm::EW_GEQ, uint32_t, float32_t>,fun<nm::EW_GEQ, uint32_t, float64_t>,fun<nm::EW_GEQ, uint32_t, nm::Complex64>,fun<nm::EW_GEQ, uint32_t, nm::Complex128>,fun<nm::EW_GEQ, uint32_t, nm::RubyObject>},\
{fun<nm::EW_GEQ, uint64_t, uint8_t>,fun<nm::EW_GEQ, uint64_t, int8_t>,fun<nm::EW_GEQ, uint64_t, int16_t>,fun<nm::EW_GEQ, uint64_t, int32_t>,fun<nm::EW_GEQ, uint64_t, int64_t>,fun<nm::EW_GEQ, uint64_t, float32_t>,fun<nm::EW_GEQ, uint64_t, float64_t>,fun<nm::EW_GEQ, uint64_t, nm::Complex64>,fun<nm::EW_GEQ, uint64_t, nm::Complex128>,fun<nm::EW_GEQ, uint64_t, nm::RubyObject>}}};


extern "C" {


/*
 * Data
 */

// regular data types
extern const char* const  DTYPE_NAMES[nm::NUM_DTYPES];
extern const size_t       DTYPE_SIZES[nm::NUM_DTYPES];

extern const nm::dtype_t Upcast[nm::NUM_DTYPES][nm::NUM_DTYPES];


/*
 * Functions
 */


void*            rubyobj_to_cval(VALUE val, nm::dtype_t dtype);
void            rubyval_to_cval(VALUE val, nm::dtype_t dtype, void* loc);

void nm_init_data();

} // end of extern "C" block

#endif // DATA_H


================================================
FILE: ext/nmatrix/data/meta.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == meta.h
//
// Header file for dealing with template metaprogramming.

#ifndef META_H
# define META_H

namespace nm {
  /*
   * Template Metaprogramming
   */
  template <typename T> struct ctype_to_dtype_enum {
    static const nm::dtype_t value_type = nm::BYTE;
  };
  template <> struct ctype_to_dtype_enum<uint8_t> { static const nm::dtype_t value_type = nm::BYTE; };
  template <> struct ctype_to_dtype_enum<int8_t>  { static const nm::dtype_t value_type = nm::INT8; };
  template <> struct ctype_to_dtype_enum<int16_t> { static const nm::dtype_t value_type = nm::INT16; };
  template <> struct ctype_to_dtype_enum<int32_t> { static const nm::dtype_t value_type = nm::INT32; };
  template <> struct ctype_to_dtype_enum<int64_t> { static const nm::dtype_t value_type = nm::INT64; };
  template <> struct ctype_to_dtype_enum<float>   { static const nm::dtype_t value_type = nm::FLOAT32; };
  template <> struct ctype_to_dtype_enum<double>  { static const nm::dtype_t value_type = nm::FLOAT64; };
  template <> struct ctype_to_dtype_enum<Complex64>   { static const nm::dtype_t value_type = nm::COMPLEX64; };
  template <> struct ctype_to_dtype_enum<Complex128>  { static const nm::dtype_t value_type = nm::COMPLEX128; };
  template <> struct ctype_to_dtype_enum<RubyObject>  { static const nm::dtype_t value_type = nm::RUBYOBJ; };


  template <nm::dtype_t Enum> struct dtype_enum_T;
  template <> struct dtype_enum_T<nm::BYTE> { typedef uint8_t type; };
  template <> struct dtype_enum_T<nm::INT8>  { typedef int8_t type;  };
  template <> struct dtype_enum_T<nm::INT16> { typedef int16_t type; };
  template <> struct dtype_enum_T<nm::INT32> { typedef int32_t type; };
  template <> struct dtype_enum_T<nm::INT64> { typedef int64_t type; };
  template <> struct dtype_enum_T<nm::FLOAT32> { typedef float type; };
  template <> struct dtype_enum_T<nm::FLOAT64> { typedef double type; };
  template <> struct dtype_enum_T<nm::COMPLEX64> { typedef nm::Complex64 type; };
  template <> struct dtype_enum_T<nm::COMPLEX128> { typedef nm::Complex128 type; };
  template <> struct dtype_enum_T<nm::RUBYOBJ> { typedef nm::RubyObject type; };

} // end namespace nm

#endif


================================================
FILE: ext/nmatrix/data/ruby_object.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == ruby_object.h
//
// Functions and classes for dealing with Ruby objects.

#ifndef RUBY_OBJECT_H
#define RUBY_OBJECT_H

/*
 * Standard Includes
 */

#include <ruby.h>
#include <iostream>
#include <type_traits>

/*
 * Project Includes
 */

#include "ruby_constants.h"

/*
 * Macros
 */
#define NM_RUBYVAL_IS_NUMERIC(val)                (FIXNUM_P(val) or RB_FLOAT_TYPE_P(val) or RB_TYPE_P(val, T_COMPLEX))

/*
 * Classes and Functions
 */

namespace nm {
template<typename T, typename U>
struct made_from_same_template : std::false_type {}; 
 
template<template<typename> class Templ, typename Arg1, typename Arg2>
struct made_from_same_template<Templ<Arg1>, Templ<Arg2> > : std::true_type {};

class RubyObject {
  public:
  VALUE rval;
  
  /*
   * Value constructor.
   */
  inline RubyObject(VALUE ref = Qnil) : rval(ref) {}
  
  /*
   * Complex number constructor.
   */
  template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
  inline RubyObject(const Complex<FloatType>& other) : rval(rb_complex_new(rb_float_new(other.r), rb_float_new(other.i))) {}
  
  /*
   * Integer constructor.
   *
   * Does not work as a template.
   */
  inline RubyObject(uint8_t other)  : rval(INT2FIX(other)) {}
  inline RubyObject(int8_t other)   : rval(INT2FIX(other)) {}
  inline RubyObject(int16_t other)  : rval(INT2FIX(other)) {}
  inline RubyObject(uint16_t other) : rval(INT2FIX(other)) {}
  inline RubyObject(int32_t other)  : rval(INT2FIX(other)) {}
  // there is no uint32_t here because that's a Ruby VALUE type, and we need the compiler to treat that as a VALUE.
  inline RubyObject(int64_t other)  : rval(INT2FIX(other)) {}
//  inline RubyObject(uint64_t other) : rval(INT2FIX(other)) {}


  /*
   * Float constructor.
   *
   * Does not work as a template.
   */
  inline RubyObject(float other)   : rval(rb_float_new(other)) {}
  inline RubyObject(double other)  : rval(rb_float_new(other)) {}

  /*
   * Operators for converting RubyObjects to other C types.
   */

#define RETURN_OBJ2NUM(mac)   if (this->rval == Qtrue) return 1; else if (this->rval == Qfalse) return 0; else return mac(this->rval);

  inline operator int8_t()  const { RETURN_OBJ2NUM(NUM2INT)         }
  inline operator uint8_t() const { RETURN_OBJ2NUM(NUM2UINT)        }
  inline operator int16_t() const { RETURN_OBJ2NUM(NUM2INT)         }
  inline operator uint16_t() const { RETURN_OBJ2NUM(NUM2UINT)       }
  inline operator int32_t() const { RETURN_OBJ2NUM(NUM2LONG)        }
  inline operator VALUE() const { return rval; }
  //inline operator uint32_t() const { return NUM2ULONG(this->rval);      }
  inline operator int64_t() const { RETURN_OBJ2NUM(NUM2LONG)        }
  //inline operator uint64_t() const { RETURN_OBJ2NUM(NUM2ULONG)      }
  inline operator double()   const { RETURN_OBJ2NUM(NUM2DBL)        }
  inline operator float()  const { RETURN_OBJ2NUM(NUM2DBL)          }

  inline operator Complex64() const { return this->to<Complex64>(); }
  inline operator Complex128() const { return this->to<Complex128>(); }
  /*
   * Copy constructors.
   */
  inline RubyObject(const RubyObject& other) : rval(other.rval) {}

  /*
   * Inverse operator.
   */
  inline RubyObject inverse() const {
    rb_raise(rb_eNotImpError, "RubyObject#inverse needs to be implemented");
  }

  /*
   * Absolute value.
   */
  inline RubyObject abs() const {
    return RubyObject(rb_funcall(this->rval, rb_intern("abs"), 0));
  }

  /*
   * Binary operator definitions.
   */
  
  inline RubyObject operator+(const RubyObject& other) const {
    return RubyObject(rb_funcall(this->rval, nm_rb_add, 1, other.rval));
  }

  inline RubyObject& operator+=(const RubyObject& other) {
    this->rval = rb_funcall(this->rval, nm_rb_add, 1, other.rval);
    return *this;
  }

  inline RubyObject operator-(const RubyObject& other) const {
    return RubyObject(rb_funcall(this->rval, nm_rb_sub, 1, other.rval));
  }

  inline RubyObject& operator-=(const RubyObject& other) {
    this->rval = rb_funcall(this->rval, nm_rb_sub, 1, other.rval);
    return *this;
  }
  
  inline RubyObject operator*(const RubyObject& other) const {
    return RubyObject(rb_funcall(this->rval, nm_rb_mul, 1, other.rval));
  }

  inline RubyObject& operator*=(const RubyObject& other) {
    this->rval = rb_funcall(this->rval, nm_rb_mul, 1, other.rval);
    return *this;
  }
  
  inline RubyObject operator/(const RubyObject& other) const {
    return RubyObject(rb_funcall(this->rval, nm_rb_div, 1, other.rval));
  }

  inline RubyObject& operator/=(const RubyObject& other) {
    this->rval = rb_funcall(this->rval, nm_rb_div, 1, other.rval);
    return *this;
  }
  
  inline RubyObject operator%(const RubyObject& other) const {
    return RubyObject(rb_funcall(this->rval, nm_rb_percent, 1, other.rval));
  }
  
  inline bool operator>(const RubyObject& other) const {
    return rb_funcall(this->rval, nm_rb_gt, 1, other.rval) == Qtrue;
  }
  
  inline bool operator<(const RubyObject& other) const {
    return rb_funcall(this->rval, nm_rb_lt, 1, other.rval) == Qtrue;
  }

  template <typename OtherType>
  inline bool operator<(const OtherType& other) const {
    return *this < RubyObject(other);
  }
  
  inline bool operator==(const RubyObject& other) const {
    return rb_funcall(this->rval, nm_rb_eql, 1, other.rval) == Qtrue;
  }

  template <typename OtherType>
  inline bool operator==(const OtherType& other) const {
    return *this == RubyObject(other);
  }
  
  inline bool operator!=(const RubyObject& other) const {
    return rb_funcall(this->rval, nm_rb_neql, 1, other.rval) == Qtrue;
  }

  template <typename OtherType>
  inline bool operator!=(const OtherType& other) const {
    return *this != RubyObject(other);
  }
  
  inline bool operator>=(const RubyObject& other) const {
    return rb_funcall(this->rval, nm_rb_gte, 1, other.rval) == Qtrue;
  }

  template <typename OtherType>
  inline bool operator>=(const OtherType& other) const {
    return *this >= RubyObject(other);
  }
  
  inline bool operator<=(const RubyObject& other) const {
    return rb_funcall(this->rval, nm_rb_lte, 1, other.rval) == Qtrue;
  }

  template <typename OtherType>
  inline bool operator<=(const OtherType& other) const {
    return *this <= RubyObject(other);
  }

  ////////////////////////////
  // RUBY-NATIVE OPERATIONS //
  ////////////////////////////
/*
  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline bool operator==(const NativeType& other) const {
    return *this == RubyObject(other);
  }

  template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
  inline bool operator!=(const NativeType& other) const {
    return *this != RubyObject(other);
  }
*/
  //////////////////////////////
  // RUBY-COMPLEX OPERATIONS //
  //////////////////////////////

  template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
  inline bool operator==(const Complex<FloatType>& other) const {
    return *this == RubyObject(other);
  }

  template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
  inline bool operator!=(const Complex<FloatType>& other) const {
    return *this != RubyObject(other);
  }

  /*
   * Convert a Ruby object to an integer.
   */
  template <typename IntType>
  inline typename std::enable_if<std::is_integral<IntType>::value, IntType>::type to(void) {
    return NUM2INT(this->rval);
  }
  
  /*
   * Convert a Ruby object to a floating point number.
   */
  template <typename FloatType>
  inline typename std::enable_if<std::is_floating_point<FloatType>::value, FloatType>::type to(void) {
    return NUM2DBL(this->rval);
  }
  
  /*
   * Convert a Ruby object to a complex number.
   */
  template <typename ComplexType>
  inline typename std::enable_if<made_from_same_template<ComplexType, Complex64>::value, ComplexType>::type to(void) const {
    if (FIXNUM_P(this->rval) or TYPE(this->rval) == T_FLOAT) {
      return ComplexType(NUM2DBL(this->rval));
      
    } else if (TYPE(this->rval) == T_COMPLEX) {
      return ComplexType(NUM2DBL(rb_funcall(this->rval, nm_rb_real, 0)), NUM2DBL(rb_funcall(this->rval, nm_rb_imag, 0)));
      
    } else {
      rb_raise(rb_eTypeError, "Invalid conversion to Complex type.");
    }
  }
};
  
// Negative operator
inline RubyObject operator-(const RubyObject& rhs) {
  return RubyObject(rb_funcall(rhs.rval, nm_rb_negate, 0));
}


////////////////////////////
// NATIVE-RUBY OPERATIONS //
////////////////////////////

template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline RubyObject operator/(const NativeType left, const RubyObject& right) {
  return RubyObject(left) / right;
}

template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator==(const NativeType left, const RubyObject& right) {
  return RubyObject(left) == right;
}

template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator!=(const NativeType left, const RubyObject& right) {
  return RubyObject(left) != right;
}

template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator<=(const NativeType left, const RubyObject& right) {
  return RubyObject(left) <= right;
}

template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator>=(const NativeType left, const RubyObject& right) {
  return RubyObject(left) >= right;
}

template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator<(const NativeType left, const RubyObject& right) {
  return RubyObject(left) < right;
}

template <typename NativeType, typename = typename std::enable_if<std::is_arithmetic<NativeType>::value>::type>
inline bool operator>(const NativeType left, const RubyObject& right) {
  return RubyObject(left) > right;
}


/////////////////////////////
// COMPLEX-RUBY OPERATIONS //
/////////////////////////////

template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
inline bool operator==(const Complex<FloatType>& left, const RubyObject& right) {
  return RubyObject(left) == right;
}

template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
inline bool operator!=(const Complex<FloatType>& left, const RubyObject& right) {
  return RubyObject(left) != right;
}

template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
inline bool operator<=(const Complex<FloatType>& left, const RubyObject& right) {
  return RubyObject(left) <= right;
}

template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
inline bool operator>=(const Complex<FloatType>& left, const RubyObject& right) {
  return RubyObject(left) >= right;
}

template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
inline bool operator<(const Complex<FloatType>& left, const RubyObject& right) {
  return RubyObject(left) < right;
}

template <typename FloatType, typename = typename std::enable_if<std::is_floating_point<FloatType>::value>::type>
inline bool operator>(const Complex<FloatType>& left, const RubyObject& right) {
  return RubyObject(left) > right;
}

} // end of namespace nm

namespace std {
  inline nm::RubyObject abs(const nm::RubyObject& obj) {
    return obj.abs();
  }


  inline nm::RubyObject sqrt(const nm::RubyObject& obj) {
    VALUE cMath = rb_const_get(rb_cObject, rb_intern("Math"));
    return nm::RubyObject(rb_funcall(cMath, rb_intern("sqrt"), 1, obj.rval));
  }
}

#endif // RUBY_OBJECT_H


================================================
FILE: ext/nmatrix/extconf.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == extconf.rb
#
# This file checks for ATLAS and other necessary headers, and
# generates a Makefile for compiling NMatrix.

require File.expand_path("../../../lib/nmatrix/mkmf", __FILE__)

$INSTALLFILES = [
  ['nmatrix.h'       , '$(archdir)'], 
  ['nmatrix.hpp'     , '$(archdir)'],
  ['nmatrix_config.h', '$(archdir)'], 
  ['nm_memory.h'     , '$(archdir)'],
  ['ruby_constants.h', '$(archdir)']
]

if /cygwin|mingw/ =~ RUBY_PLATFORM
  $INSTALLFILES << ['libnmatrix.a', '$(archdir)']
end

$DEBUG = true
$CFLAGS = ["-Wall -Werror=return-type",$CFLAGS].join(" ")
$CXXFLAGS = ["-Wall -Werror=return-type",$CXXFLAGS].join(" ")
$CPPFLAGS = ["-Wall -Werror=return-type",$CPPFLAGS].join(" ")

# When adding objects here, make sure their directories are included in CLEANOBJS down at the bottom of extconf.rb.
basenames = %w{nmatrix ruby_constants data/data util/io math util/sl_list storage/common storage/storage storage/dense/dense storage/yale/yale storage/list/list}
$objs = basenames.map { |b| "#{b}.o"   }
$srcs = basenames.map { |b| "#{b}.cpp" }

#$libs += " -lprofiler "

create_conf_h("nmatrix_config.h")
create_makefile("nmatrix")

Dir.mkdir("data") unless Dir.exists?("data")
Dir.mkdir("util") unless Dir.exists?("util")
Dir.mkdir("storage") unless Dir.exists?("storage")
Dir.chdir("storage") do
  Dir.mkdir("yale")  unless Dir.exists?("yale")
  Dir.mkdir("list")  unless Dir.exists?("list")
  Dir.mkdir("dense") unless Dir.exists?("dense")
end

# to clean up object files in subdirectories:
open('Makefile', 'a') do |f|
  clean_objs_paths = %w{data storage storage/dense storage/yale storage/list util}.map { |d| "#{d}/*.#{CONFIG["OBJEXT"]}" }
  f.write("CLEANOBJS := $(CLEANOBJS) #{clean_objs_paths.join(' ')}")
end


================================================
FILE: ext/nmatrix/math/asum.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == asum.h
//
// CBLAS asum function
//

/*
 *             Automatically Tuned Linear Algebra Software v3.8.4
 *                    (C) Copyright 1999 R. Clint Whaley
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the ATLAS group or the names of its contributers may
 *      not be used to endorse or promote products derived from this
 *      software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef ASUM_H
#define ASUM_H


#include "math/magnitude.h"

namespace nm { namespace math {

/*
 * Level 1 BLAS routine which sums the absolute values of a vector's contents. If the vector consists of complex values,
 * the routine sums the absolute values of the real and imaginary components as well.
 *
 * So, based on input types, these are the valid return types:
 *    int -> int
 *    float -> float or double
 *    double -> double
 *    complex64 -> float or double
 *    complex128 -> double
 */
template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
inline MDType asum(const int N, const DType* X, const int incX) {
  MDType sum = 0;
  if ((N > 0) && (incX > 0)) {
    for (int i = 0; i < N; ++i) {
      sum += magnitude(X[i*incX]);
    }
  }
  return sum;
}


template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
inline void cblas_asum(const int N, const void* X, const int incX, void* sum) {
  *reinterpret_cast<MDType*>( sum ) = asum<DType,MDType>( N, reinterpret_cast<const DType*>(X), incX );
}


}} // end of namespace nm::math

#endif // ASUM_H


================================================
FILE: ext/nmatrix/math/cblas_enums.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2015, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2015, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == cblas_enums.h
//
// CBLAS definitions for when CBLAS is not available.
//

#ifndef CBLAS_ENUM_DEFINED_H
#define CBLAS_ENUM_DEFINED_H
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113};
enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
#endif


================================================
FILE: ext/nmatrix/math/cblas_templates_core.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == cblas_templates_core.h
//
// This header files is not used by the main nmatrix gem but has to be stored
// in this directory so that it can be shared between nmatrix-atlas and
// nmatrix-lapack.
//

//This is not a normal header file so we don't use an include guard.
//See ext/nmatrix_atlas/math_atlas/cblas_templates_atlas.h for how
//to use.

//Below are the BLAS functions for which we have internal implementations.
//The internal implementations are defined in the ext/nmatrix/math directory
//and are the non-specialized
//forms of the template functions nm::math::whatever().
//They are are called below for non-BLAS
//types in the non-specialized form of the template nm::math::something_else::whatever().
//The specialized forms call the appropriate cblas functions.

//For all functions besides herk, we also define the cblas_whatever() template
//functions below, which just cast
//their arguments to the appropriate types.

//rotg
template <typename DType>
inline void rotg(DType* a, DType* b, DType* c, DType* s) {
  nm::math::rotg(a, b, c, s);
}

template <>
inline void rotg(float* a, float* b, float* c, float* s) {
  cblas_srotg(a, b, c, s);
}

template <>
inline void rotg(double* a, double* b, double* c, double* s) {
  cblas_drotg(a, b, c, s);
}

//Complex versions of rot and rotg are available in the ATLAS (and Intel)
//version of CBLAS, but not part
//of the reference implementation or OpenBLAS, so we omit them here
//and fall back to the generic internal implementation.
//Another options would be to directly call the fortran functions, e.g. ZROTG,
//which for some reason are a part of the standard.
//We can still define complex specializations of these functions in an ATLAS-specific
//header.

template <typename DType>
inline void cblas_rotg(void* a, void* b, void* c, void* s) {
  rotg<DType>(static_cast<DType*>(a), static_cast<DType*>(b), static_cast<DType*>(c), static_cast<DType*>(s));
}

//rot
template <typename DType, typename CSDType>
inline void rot(const int N, DType* X, const int incX, DType* Y, const int incY, const CSDType c, const CSDType s) {
  nm::math::rot<DType,CSDType>(N, X, incX, Y, incY, c, s);
}

template <>
inline void rot(const int N, float* X, const int incX, float* Y, const int incY, const float c, const float s) {
  cblas_srot(N, X, incX, Y, incY, (float)c, (float)s);
}

template <>
inline void rot(const int N, double* X, const int incX, double* Y, const int incY, const double c, const double s) {
  cblas_drot(N, X, incX, Y, incY, c, s);
}

template <typename DType, typename CSDType>
inline void cblas_rot(const int N, void* X, const int incX, void* Y, const int incY, const void* c, const void* s) {
  rot<DType,CSDType>(N, static_cast<DType*>(X), incX, static_cast<DType*>(Y), incY,
                       *static_cast<const CSDType*>(c), *static_cast<const CSDType*>(s));
}

/*
 * Level 1 BLAS routine which sums the absolute values of a vector's contents. If the vector consists of complex values,
 * the routine sums the absolute values of the real and imaginary components as well.
 *
 * So, based on input types, these are the valid return types:
 *    int -> int
 *    float -> float or double
 *    double -> double
 *    complex64 -> float or double
 *    complex128 -> double
 */
template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
inline MDType asum(const int N, const DType* X, const int incX) {
  return nm::math::asum<DType,MDType>(N,X,incX);
}


template <>
inline float asum(const int N, const float* X, const int incX) {
  return cblas_sasum(N, X, incX);
}

template <>
inline double asum(const int N, const double* X, const int incX) {
  return cblas_dasum(N, X, incX);
}

template <>
inline float asum(const int N, const Complex64* X, const int incX) {
  return cblas_scasum(N, X, incX);
}

template <>
inline double asum(const int N, const Complex128* X, const int incX) {
  return cblas_dzasum(N, X, incX);
}


template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
inline void cblas_asum(const int N, const void* X, const int incX, void* sum) {
  *static_cast<MDType*>( sum ) = asum<DType, MDType>( N, static_cast<const DType*>(X), incX );
}

/*
 * Level 1 BLAS routine which returns the 2-norm of an n-vector x.
 #
 * Based on input types, these are the valid return types:
 *    int -> int
 *    float -> float or double
 *    double -> double
 *    complex64 -> float or double
 *    complex128 -> double
 */
template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
inline MDType nrm2(const int N, const DType* X, const int incX) {
  return nm::math::nrm2<DType,MDType>(N, X, incX);
}


template <>
inline float nrm2(const int N, const float* X, const int incX) {
  return cblas_snrm2(N, X, incX);
}

template <>
inline double nrm2(const int N, const double* X, const int incX) {
  return cblas_dnrm2(N, X, incX);
}

template <>
inline float nrm2(const int N, const Complex64* X, const int incX) {
  return cblas_scnrm2(N, X, incX);
}

template <>
inline double nrm2(const int N, const Complex128* X, const int incX) {
  return cblas_dznrm2(N, X, incX);
}

template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
inline void cblas_nrm2(const int N, const void* X, const int incX, void* result) {
  *static_cast<MDType*>( result ) = nrm2<DType, MDType>( N, static_cast<const DType*>(X), incX );
}

//imax
template<typename DType>
inline int imax(const int n, const DType *x, const int incx) {
  return nm::math::imax(n, x, incx);
}

template<>
inline int imax(const int n, const float* x, const int incx) {
  return cblas_isamax(n, x, incx);
}

template<>
inline int imax(const int n, const double* x, const int incx) {
  return cblas_idamax(n, x, incx);
}

template<>
inline int imax(const int n, const Complex64* x, const int incx) {
  return cblas_icamax(n, x, incx);
}

template <>
inline int imax(const int n, const Complex128* x, const int incx) {
  return cblas_izamax(n, x, incx);
}

template<typename DType>
inline int cblas_imax(const int n, const void* x, const int incx) {
  return imax<DType>(n, static_cast<const DType*>(x), incx);
}

//scal
template <typename DType>
inline void scal(const int n, const DType scalar, DType* x, const int incx) {
  nm::math::scal(n, scalar, x, incx);
}

template <>
inline void scal(const int n, const float scalar, float* x, const int incx) {
  cblas_sscal(n, scalar, x, incx);
}

template <>
inline void scal(const int n, const double scalar, double* x, const int incx) {
  cblas_dscal(n, scalar, x, incx);
}

template <>
inline void scal(const int n, const Complex64 scalar, Complex64* x, const int incx) {
  cblas_cscal(n, &scalar, x, incx);
}

template <>
inline void scal(const int n, const Complex128 scalar, Complex128* x, const int incx) {
  cblas_zscal(n, &scalar, x, incx);
}

template <typename DType>
inline void cblas_scal(const int n, const void* scalar, void* x, const int incx) {
  scal<DType>(n, *static_cast<const DType*>(scalar), static_cast<DType*>(x), incx);
}

//gemv
template <typename DType>
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const DType* alpha, const DType* A, const int lda,
          const DType* X, const int incX, const DType* beta, DType* Y, const int incY) {
  return nm::math::gemv(Trans, M, N, alpha, A, lda, X, incX, beta, Y, incY);
}

template <>
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const float* alpha, const float* A, const int lda,
          const float* X, const int incX, const float* beta, float* Y, const int incY) {
  cblas_sgemv(CblasRowMajor, Trans, M, N, *alpha, A, lda, X, incX, *beta, Y, incY);
  return true;
}

template <>
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const double* alpha, const double* A, const int lda,
          const double* X, const int incX, const double* beta, double* Y, const int incY) {
  cblas_dgemv(CblasRowMajor, Trans, M, N, *alpha, A, lda, X, incX, *beta, Y, incY);
  return true;
}

template <>
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const Complex64* alpha, const Complex64* A, const int lda,
          const Complex64* X, const int incX, const Complex64* beta, Complex64* Y, const int incY) {
  cblas_cgemv(CblasRowMajor, Trans, M, N, alpha, A, lda, X, incX, beta, Y, incY);
  return true;
}

template <>
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const Complex128* alpha, const Complex128* A, const int lda,
          const Complex128* X, const int incX, const Complex128* beta, Complex128* Y, const int incY) {
  cblas_zgemv(CblasRowMajor, Trans, M, N, alpha, A, lda, X, incX, beta, Y, incY);
  return true;
}

template <typename DType>
inline static bool cblas_gemv(const enum CBLAS_TRANSPOSE trans,
                              const int m, const int n,
                              const void* alpha,
                              const void* a, const int lda,
                              const void* x, const int incx,
                              const void* beta,
                              void* y, const int incy)
{
  return gemv<DType>(trans,
                     m, n, static_cast<const DType*>(alpha),
                     static_cast<const DType*>(a), lda,
                     static_cast<const DType*>(x), incx, static_cast<const DType*>(beta),
                     static_cast<DType*>(y), incy);
}

//gemm
template <typename DType>
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
                 const DType* alpha, const DType* A, const int lda, const DType* B, const int ldb, const DType* beta, DType* C, const int ldc)
{
  nm::math::gemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
}

template <>
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
          const float* alpha, const float* A, const int lda, const float* B, const int ldb, const float* beta, float* C, const int ldc) {
  cblas_sgemm(Order, TransA, TransB, M, N, K, *alpha, A, lda, B, ldb, *beta, C, ldc);
}

template <>
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
          const double* alpha, const double* A, const int lda, const double* B, const int ldb, const double* beta, double* C, const int ldc) {
  cblas_dgemm(Order, TransA, TransB, M, N, K, *alpha, A, lda, B, ldb, *beta, C, ldc);
}

template <>
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
          const Complex64* alpha, const Complex64* A, const int lda, const Complex64* B, const int ldb, const Complex64* beta, Complex64* C, const int ldc) {
  cblas_cgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
}

template <>
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
          const Complex128* alpha, const Complex128* A, const int lda, const Complex128* B, const int ldb, const Complex128* beta, Complex128* C, const int ldc) {
  cblas_zgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
}

template <typename DType>
inline static void cblas_gemm(const enum CBLAS_ORDER order,
                              const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
                              int m, int n, int k,
                              void* alpha,
                              void* a, int lda,
                              void* b, int ldb,
                              void* beta,
                              void* c, int ldc)
{
  gemm<DType>(order, trans_a, trans_b, m, n, k, static_cast<DType*>(alpha),
              static_cast<DType*>(a), lda,
              static_cast<DType*>(b), ldb, static_cast<DType*>(beta),
              static_cast<DType*>(c), ldc);
}

//trsm
template <typename DType, typename = typename std::enable_if<!std::is_integral<DType>::value>::type>
inline void trsm(const enum CBLAS_ORDER order,
                 const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
                 const int m, const int n, const DType alpha, const DType* a,
                 const int lda, DType* b, const int ldb)
{
  nm::math::trsm(order, side, uplo, trans_a, diag, m, n, alpha, a, lda, b, ldb);
}

template <>
inline void trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
                 const int m, const int n, const float alpha, const float* a,
                 const int lda, float* b, const int ldb)
{
  cblas_strsm(order, side, uplo, trans_a, diag, m, n, alpha, a, lda, b, ldb);
}

template <>
inline void trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
                 const int m, const int n, const double alpha, const double* a,
                 const int lda, double* b, const int ldb)
{
  cblas_dtrsm(order, side, uplo, trans_a, diag, m, n, alpha, a, lda, b, ldb);
}


template <>
inline void trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
                 const int m, const int n, const Complex64 alpha, const Complex64* a,
                 const int lda, Complex64* b, const int ldb)
{
  cblas_ctrsm(order, side, uplo, trans_a, diag, m, n, &alpha, a, lda, b, ldb);
}

template <>
inline void trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
                 const int m, const int n, const Complex128 alpha, const Complex128* a,
                 const int lda, Complex128* b, const int ldb)
{
  cblas_ztrsm(order, side, uplo, trans_a, diag, m, n, &alpha, a, lda, b, ldb);
}

template <typename DType>
inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                               const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
                               const int m, const int n, const void* alpha, const void* a,
                               const int lda, void* b, const int ldb)
{
  trsm<DType>(order, side, uplo, trans_a, diag, m, n, *static_cast<const DType*>(alpha),
              static_cast<const DType*>(a), lda, static_cast<DType*>(b), ldb);
}

//Below are BLAS functions that we don't have an internal implementation for.
//In this case the non-specialized form just raises an error.

//syrk
template <typename DType>
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
                 const int K, const DType* alpha, const DType* A, const int lda, const DType* beta, DType* C, const int ldc) {
  rb_raise(rb_eNotImpError, "syrk not yet implemented for non-BLAS dtypes");
}

template <>
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
                 const int K, const float* alpha, const float* A, const int lda, const float* beta, float* C, const int ldc) {
  cblas_ssyrk(Order, Uplo, Trans, N, K, *alpha, A, lda, *beta, C, ldc);
}

template <>
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
                 const int K, const double* alpha, const double* A, const int lda, const double* beta, double* C, const int ldc) {
  cblas_dsyrk(Order, Uplo, Trans, N, K, *alpha, A, lda, *beta, C, ldc);
}

template <>
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
                 const int K, const Complex64* alpha, const Complex64* A, const int lda, const Complex64* beta, Complex64* C, const int ldc) {
  cblas_csyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
}

template <>
inline void syrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
                 const int K, const Complex128* alpha, const Complex128* A, const int lda, const Complex128* beta, Complex128* C, const int ldc) {
  cblas_zsyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
}

template <typename DType>
inline static void cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans,
                              const int n, const int k, const void* alpha,
                              const void* A, const int lda, const void* beta, void* C, const int ldc)
{
  syrk<DType>(order, uplo, trans, n, k, static_cast<const DType*>(alpha),
              static_cast<const DType*>(A), lda, static_cast<const DType*>(beta), static_cast<DType*>(C), ldc);
}

//herk
template <typename DType>
inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
                 const int K, const DType* alpha, const DType* A, const int lda, const DType* beta, DType* C, const int ldc) {
  rb_raise(rb_eNotImpError, "herk not yet implemented for non-BLAS dtypes");
}

template <>
inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
                 const int K, const Complex64* alpha, const Complex64* A, const int lda, const Complex64* beta, Complex64* C, const int ldc) {
  cblas_cherk(Order, Uplo, Trans, N, K, alpha->r, A, lda, beta->r, C, ldc);
}

template <>
inline void herk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N,
                 const int K, const Complex128* alpha, const Complex128* A, const int lda, const Complex128* beta, Complex128* C, const int ldc) {
  cblas_zherk(Order, Uplo, Trans, N, K, alpha->r, A, lda, beta->r, C, ldc);
}

//trmm
template <typename DType>
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const DType* alpha,
                 const DType* A, const int lda, DType* B, const int ldb) {
  rb_raise(rb_eNotImpError, "trmm not yet implemented for non-BLAS dtypes");
}

template <>
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const float* alpha,
                 const float* A, const int lda, float* B, const int ldb) {
  cblas_strmm(order, side, uplo, ta, diag, m, n, *alpha, A, lda, B, ldb);
}

template <>
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const double* alpha,
                 const double* A, const int lda, double* B, const int ldb) {
  cblas_dtrmm(order, side, uplo, ta, diag, m, n, *alpha, A, lda, B, ldb);
}

template <>
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const Complex64* alpha,
                 const Complex64* A, const int lda, Complex64* B, const int ldb) {
  cblas_ctrmm(order, side, uplo, ta, diag, m, n, alpha, A, lda, B, ldb);
}

template <>
inline void trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const Complex128* alpha,
                 const Complex128* A, const int lda, Complex128* B, const int ldb) {
  cblas_ztrmm(order, side, uplo, ta, diag, m, n, alpha, A, lda, B, ldb);
}

template <typename DType>
inline static void cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                              const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const void* alpha,
                              const void* A, const int lda, void* B, const int ldb)
{
  trmm<DType>(order, side, uplo, ta, diag, m, n, static_cast<const DType*>(alpha),
              static_cast<const DType*>(A), lda, static_cast<DType*>(B), ldb);
}


================================================
FILE: ext/nmatrix/math/gemm.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == gemm.h
//
// Header file for interface with ATLAS's CBLAS gemm functions and
// native templated version of LAPACK's gemm function.
//

#ifndef GEMM_H
# define GEMM_H

#include "cblas_enums.h"
#include "math/long_dtype.h"

namespace nm { namespace math {
/*
 * GEneral Matrix Multiplication: based on dgemm.f from Netlib.
 *
 * This is an extremely inefficient algorithm. Recommend using ATLAS' version instead.
 *
 * Template parameters: LT -- long version of type T. Type T is the matrix dtype.
 *
 * This version throws no errors. Use gemm<DType> instead for error checking.
 */
template <typename DType>
inline void gemm_nothrow(const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
                 const DType* alpha, const DType* A, const int lda, const DType* B, const int ldb, const DType* beta, DType* C, const int ldc)
{

  typename LongDType<DType>::type temp;

  // Quick return if possible
  if (!M or !N or ((*alpha == 0 or !K) and *beta == 1)) return;

  // For alpha = 0
  if (*alpha == 0) {
    if (*beta == 0) {
      for (int j = 0; j < N; ++j)
        for (int i = 0; i < M; ++i) {
          C[i+j*ldc] = 0;
        }
    } else {
      for (int j = 0; j < N; ++j)
        for (int i = 0; i < M; ++i) {
          C[i+j*ldc] *= *beta;
        }
    }
    return;
  }

  // Start the operations
  if (TransB == CblasNoTrans) {
    if (TransA == CblasNoTrans) {
      // C = alpha*A*B+beta*C
      for (int j = 0; j < N; ++j) {
        if (*beta == 0) {
          for (int i = 0; i < M; ++i) {
            C[i+j*ldc] = 0;
          }
        } else if (*beta != 1) {
          for (int i = 0; i < M; ++i) {
            C[i+j*ldc] *= *beta;
          }
        }

        for (int l = 0; l < K; ++l) {
          if (B[l+j*ldb] != 0) {
            temp = *alpha * B[l+j*ldb];
            for (int i = 0; i < M; ++i) {
              C[i+j*ldc] += A[i+l*lda] * temp;
            }
          }
        }
      }

    } else {

      // C = alpha*A**DType*B + beta*C
      for (int j = 0; j < N; ++j) {
        for (int i = 0; i < M; ++i) {
          temp = 0;
          for (int l = 0; l < K; ++l) {
            temp += A[l+i*lda] * B[l+j*ldb];
          }

          if (*beta == 0) {
            C[i+j*ldc] = *alpha*temp;
          } else {
            C[i+j*ldc] = *alpha*temp + *beta*C[i+j*ldc];
          }
        }
      }

    }

  } else if (TransA == CblasNoTrans) {

    // C = alpha*A*B**T + beta*C
    for (int j = 0; j < N; ++j) {
      if (*beta == 0) {
        for (int i = 0; i < M; ++i) {
          C[i+j*ldc] = 0;
        }
      } else if (*beta != 1) {
        for (int i = 0; i < M; ++i) {
          C[i+j*ldc] *= *beta;
        }
      }

      for (int l = 0; l < K; ++l) {
        if (B[j+l*ldb] != 0) {
          temp = *alpha * B[j+l*ldb];
          for (int i = 0; i < M; ++i) {
            C[i+j*ldc] += A[i+l*lda] * temp;
          }
        }
      }

    }

  } else {

    // C = alpha*A**DType*B**T + beta*C
    for (int j = 0; j < N; ++j) {
      for (int i = 0; i < M; ++i) {
        temp = 0;
        for (int l = 0; l < K; ++l) {
          temp += A[l+i*lda] * B[j+l*ldb];
        }

        if (*beta == 0) {
          C[i+j*ldc] = *alpha*temp;
        } else {
          C[i+j*ldc] = *alpha*temp + *beta*C[i+j*ldc];
        }
      }
    }

  }

  return;
}


template <typename DType>
inline void gemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
                 const DType* alpha, const DType* A, const int lda, const DType* B, const int ldb, const DType* beta, DType* C, const int ldc)
{
  if (Order == CblasRowMajor) {
    if (TransA == CblasNoTrans) {
      if (lda < std::max(K,1)) {
        rb_raise(rb_eArgError, "lda must be >= MAX(K,1): lda=%d K=%d", lda, K);
      }
    } else {
      if (lda < std::max(M,1)) { // && TransA == CblasTrans
        rb_raise(rb_eArgError, "lda must be >= MAX(M,1): lda=%d M=%d", lda, M);
      }
    }

    if (TransB == CblasNoTrans) {
      if (ldb < std::max(N,1)) {
        rb_raise(rb_eArgError, "ldb must be >= MAX(N,1): ldb=%d N=%d", ldb, N);
      }
    } else {
      if (ldb < std::max(K,1)) {
        rb_raise(rb_eArgError, "ldb must be >= MAX(K,1): ldb=%d K=%d", ldb, K);
      }
    }

    if (ldc < std::max(N,1)) {
      rb_raise(rb_eArgError, "ldc must be >= MAX(N,1): ldc=%d N=%d", ldc, N);
    }
  } else { // CblasColMajor
    if (TransA == CblasNoTrans) {
      if (lda < std::max(M,1)) {
        rb_raise(rb_eArgError, "lda must be >= MAX(M,1): lda=%d M=%d", lda, M);
      }
    } else {
      if (lda < std::max(K,1)) { // && TransA == CblasTrans
        rb_raise(rb_eArgError, "lda must be >= MAX(K,1): lda=%d K=%d", lda, K);
      }
    }

    if (TransB == CblasNoTrans) {
      if (ldb < std::max(K,1)) {
        rb_raise(rb_eArgError, "ldb must be >= MAX(K,1): ldb=%d N=%d", ldb, K);
      }
    } else {
      if (ldb < std::max(N,1)) { // NOTE: This error message is actually wrong in the ATLAS source currently. Or are we wrong?
        rb_raise(rb_eArgError, "ldb must be >= MAX(N,1): ldb=%d N=%d", ldb, N);
      }
    }

    if (ldc < std::max(M,1)) {
      rb_raise(rb_eArgError, "ldc must be >= MAX(M,1): ldc=%d N=%d", ldc, M);
    }
  }

  /*
   * Call SYRK when that's what the user is actually asking for; just handle beta=0, because beta=X requires
   * we copy C and then subtract to preserve asymmetry.
   */

  if (A == B && M == N && TransA != TransB && lda == ldb && beta == 0) {
    rb_raise(rb_eNotImpError, "syrk and syreflect not implemented");
    /*syrk<DType>(CblasUpper, (Order == CblasColMajor) ? TransA : TransB, N, K, alpha, A, lda, beta, C, ldc);
    syreflect(CblasUpper, N, C, ldc);
    */
  }

  if (Order == CblasRowMajor)    gemm_nothrow<DType>(TransB, TransA, N, M, K, alpha, B, ldb, A, lda, beta, C, ldc);
  else                           gemm_nothrow<DType>(TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);

}


}} // end of namespace nm::math

#endif // GEMM_H


================================================
FILE: ext/nmatrix/math/gemv.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == gemv.h
//
// Header file for interface with ATLAS's CBLAS gemv functions and
// native templated version of LAPACK's gemv function.
//

#ifndef GEMV_H
# define GEMV_H

#include "math/long_dtype.h"

namespace nm { namespace math {

/*
 * GEneral Matrix-Vector multiplication: based on dgemv.f from Netlib.
 *
 * This is an extremely inefficient algorithm. Recommend using ATLAS' version instead.
 *
 * Template parameters: LT -- long version of type T. Type T is the matrix dtype.
 */
template <typename DType>
inline bool gemv(const enum CBLAS_TRANSPOSE Trans, const int M, const int N, const DType* alpha, const DType* A, const int lda,
          const DType* X, const int incX, const DType* beta, DType* Y, const int incY) {
  int lenX, lenY, i, j;
  int kx, ky, iy, jx, jy, ix;

  typename LongDType<DType>::type temp;

  // Test the input parameters
  if (Trans < 111 || Trans > 113) {
    rb_raise(rb_eArgError, "GEMV: TransA must be CblasNoTrans, CblasTrans, or CblasConjTrans");
    return false;
  } else if (lda < std::max(1, N)) {
    fprintf(stderr, "GEMV: N = %d; got lda=%d", N, lda);
    rb_raise(rb_eArgError, "GEMV: Expected lda >= max(1, N)");
    return false;
  } else if (incX == 0) {
    rb_raise(rb_eArgError, "GEMV: Expected incX != 0\n");
    return false;
  } else if (incY == 0) {
    rb_raise(rb_eArgError, "GEMV: Expected incY != 0\n");
    return false;
  }

  // Quick return if possible
  if (!M or !N or (*alpha == 0 and *beta == 1)) return true;

  if (Trans == CblasNoTrans) {
    lenX = N;
    lenY = M;
  } else {
    lenX = M;
    lenY = N;
  }

  if (incX > 0) kx = 0;
  else          kx = (lenX - 1) * -incX;

  if (incY > 0) ky = 0;
  else          ky =  (lenY - 1) * -incY;

  // Start the operations. In this version, the elements of A are accessed sequentially with one pass through A.
  if (*beta != 1) {
    if (incY == 1) {
      if (*beta == 0) {
        for (i = 0; i < lenY; ++i) {
          Y[i] = 0;
        }
      } else {
        for (i = 0; i < lenY; ++i) {
          Y[i] *= *beta;
        }
      }
    } else {
      iy = ky;
      if (*beta == 0) {
        for (i = 0; i < lenY; ++i) {
          Y[iy] = 0;
          iy += incY;
        }
      } else {
        for (i = 0; i < lenY; ++i) {
          Y[iy] *= *beta;
          iy += incY;
        }
      }
    }
  }

  if (*alpha == 0) return false;

  if (Trans == CblasNoTrans) {

    // Form  y := alpha*A*x + y.
    jx = kx;
    if (incY == 1) {
      for (j = 0; j < N; ++j) {
        if (X[jx] != 0) {
          temp = *alpha * X[jx];
          for (i = 0; i < M; ++i) {
            Y[i] += A[j+i*lda] * temp;
          }
        }
        jx += incX;
      }
    } else {
      for (j = 0; j < N; ++j) {
        if (X[jx] != 0) {
          temp = *alpha * X[jx];
          iy = ky;
          for (i = 0; i < M; ++i) {
            Y[iy] += A[j+i*lda] * temp;
            iy += incY;
          }
        }
        jx += incX;
      }
    }

  } else { // TODO: Check that indices are correct! They're switched for C.

    // Form  y := alpha*A**DType*x + y.
    jy = ky;

    if (incX == 1) {
      for (j = 0; j < N; ++j) {
        temp = 0;
        for (i = 0; i < M; ++i) {
          temp += A[j+i*lda]*X[j];
        }
        Y[jy] += *alpha * temp;
        jy += incY;
      }
    } else {
      for (j = 0; j < N; ++j) {
        temp = 0;
        ix = kx;
        for (i = 0; i < M; ++i) {
          temp += A[j+i*lda] * X[ix];
          ix += incX;
        }

        Y[jy] += *alpha * temp;
        jy += incY;
      }
    }
  }

  return true;
}  // end of GEMV


}} // end of namespace nm::math

#endif // GEMM_H


================================================
FILE: ext/nmatrix/math/getrf.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == getrf.h
//
// getrf function in native C++.
//

/*
 *             Automatically Tuned Linear Algebra Software v3.8.4
 *                    (C) Copyright 1999 R. Clint Whaley
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the ATLAS group or the names of its contributers may
 *      not be used to endorse or promote products derived from this
 *      software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef GETRF_H
#define GETRF_H

#include "math/laswp.h"
#include "math/math.h"
#include "math/trsm.h"
#include "math/gemm.h"
#include "math/imax.h"
#include "math/scal.h"

namespace nm { namespace math {

/* Numeric inverse -- usually just 1 / f, but a little more complicated for complex. */
template <typename DType>
inline DType numeric_inverse(const DType& n) {
  return n.inverse();
}
template <> inline float numeric_inverse(const float& n) { return 1 / n; }
template <> inline double numeric_inverse(const double& n) { return 1 / n; }

/*
 * Templated version of row-order and column-order getrf, derived from ATL_getrfR.c (from ATLAS 3.8.0).
 *
 * 1. Row-major factorization of form
 *   A = L * U * P
 * where P is a column-permutation matrix, L is lower triangular (lower
 * trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
 * trapazoidal if M < N).  This is the recursive Level 3 BLAS version.
 *
 * 2. Column-major factorization of form
 *   A = P * L * U
 * where P is a row-permutation matrix, L is lower triangular with unit diagonal
 * elements (lower trapazoidal if M > N), and U is upper triangular (upper
 * trapazoidal if M < N).  This is the recursive Level 3 BLAS version.
 *
 * Template argument determines whether 1 or 2 is utilized.
 */
template <bool RowMajor, typename DType>
inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int* ipiv) {
  const int MN = std::min(M, N);
  int ierr = 0;

  // Symbols used by ATLAS in the several versions of this function:
  // Row   Col      Us
  // Nup   Nleft    N_ul
  // Ndown Nright   N_dr
  // We're going to use N_ul, N_dr

  DType neg_one = -1, one = 1;

  if (MN > 1) {
    int N_ul = MN >> 1;

    // FIXME: Figure out how ATLAS #defines NB
#ifdef NB
    if (N_ul > NB) N_ul = ATL_MulByNB(ATL_DivByNB(N_ul));
#endif

    int N_dr;
    if (RowMajor) {
      N_dr = M - N_ul;
    } else {
      N_dr = N - N_ul;
    }

    int i = RowMajor ? getrf_nothrow<true,DType>(N_ul, N, A, lda, ipiv) : getrf_nothrow<false,DType>(M, N_ul, A, lda, ipiv);

    if (i) if (!ierr) ierr = i;

    DType *Ar, *Ac, *An;
    if (RowMajor) {
      Ar = &(A[N_ul * lda]),
      Ac = &(A[N_ul]);
      An = &(Ar[N_ul]);

      nm::math::laswp<DType>(N_dr, Ar, lda, 0, N_ul, ipiv, 1);

      nm::math::trsm<DType>(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans, CblasUnit, N_dr, N_ul, one, A, lda, Ar, lda);
      nm::math::gemm<DType>(CblasRowMajor, CblasNoTrans, CblasNoTrans, N_dr, N-N_ul, N_ul, &neg_one, Ar, lda, Ac, lda, &one, An, lda);

      i = getrf_nothrow<true,DType>(N_dr, N-N_ul, An, lda, ipiv+N_ul);
    } else {
      Ar = NULL;
      Ac = &(A[N_ul * lda]);
      An = &(Ac[N_ul]);

      nm::math::laswp<DType>(N_dr, Ac, lda, 0, N_ul, ipiv, 1);

      nm::math::trsm<DType>(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, N_ul, N_dr, one, A, lda, Ac, lda);
      nm::math::gemm<DType>(CblasColMajor, CblasNoTrans, CblasNoTrans, M-N_ul, N_dr, N_ul, &neg_one, &(A[N_ul]), lda, Ac, lda, &one, An, lda);

      i = getrf_nothrow<false,DType>(M-N_ul, N_dr, An, lda, ipiv+N_ul);
    }

    if (i) if (!ierr) ierr = N_ul + i;

    for (i = N_ul; i != MN; i++) {
      ipiv[i] += N_ul;
    }

    nm::math::laswp<DType>(N_ul, A, lda, N_ul, MN, ipiv, 1);  /* apply pivots */

  } else if (MN == 1) { // there's another case for the colmajor version, but it doesn't seem to be necessary.

    int i;
    if (RowMajor) {
      i = *ipiv = nm::math::imax<DType>(N, A, 1); // cblas_iamax(N, A, 1);
    } else {
      i = *ipiv = nm::math::imax<DType>(M, A, 1);
    }

    DType tmp = A[i];
    if (tmp != 0) {

      nm::math::scal<DType>((RowMajor ? N : M), nm::math::numeric_inverse(tmp), A, 1);
      A[i] = *A;
      *A   = tmp;

    } else ierr = 1;

  }
  return(ierr);
}


/*
 * From ATLAS 3.8.0:
 *
 * Computes one of two LU factorizations based on the setting of the Order
 * parameter, as follows:
 * ----------------------------------------------------------------------------
 *                       Order == CblasColMajor
 * Column-major factorization of form
 *   A = P * L * U
 * where P is a row-permutation matrix, L is lower triangular with unit
 * diagonal elements (lower trapazoidal if M > N), and U is upper triangular
 * (upper trapazoidal if M < N).
 *
 * ----------------------------------------------------------------------------
 *                       Order == CblasRowMajor
 * Row-major factorization of form
 *   A = P * L * U
 * where P is a column-permutation matrix, L is lower triangular (lower
 * trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
 * trapazoidal if M < N).
 *
 * ============================================================================
 * Let IERR be the return value of the function:
 *    If IERR == 0, successful exit.
 *    If (IERR < 0) the -IERR argument had an illegal value
 *    If (IERR > 0 && Order == CblasColMajor)
 *       U(i-1,i-1) is exactly zero.  The factorization has been completed,
 *       but the factor U is exactly singular, and division by zero will
 *       occur if it is used to solve a system of equations.
 *    If (IERR > 0 && Order == CblasRowMajor)
 *       L(i-1,i-1) is exactly zero.  The factorization has been completed,
 *       but the factor L is exactly singular, and division by zero will
 *       occur if it is used to solve a system of equations.
 */
template <typename DType>
inline int getrf(const enum CBLAS_ORDER Order, const int M, const int N, DType* A, int lda, int* ipiv) {
  if (Order == CblasRowMajor) {
    if (lda < std::max(1,N)) {
      rb_raise(rb_eArgError, "GETRF: lda must be >= MAX(N,1): lda=%d N=%d", lda, N);
      return -6;
    }

    return getrf_nothrow<true,DType>(M, N, A, lda, ipiv);
  } else {
    if (lda < std::max(1,M)) {
      rb_raise(rb_eArgError, "GETRF: lda must be >= MAX(M,1): lda=%d M=%d", lda, M);
      return -6;
    }

    return getrf_nothrow<false,DType>(M, N, A, lda, ipiv);
    //rb_raise(rb_eNotImpError, "column major getrf not implemented");
  }
}


/*
* Function signature conversion for calling LAPACK's getrf functions as directly as possible.
*
* For documentation: http://www.netlib.org/lapack/double/dgetrf.f
*
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
*/
template <typename DType>
inline int clapack_getrf(const enum CBLAS_ORDER order, const int m, const int n, void* a, const int lda, int* ipiv) {
  return getrf<DType>(order, m, n, reinterpret_cast<DType*>(a), lda, ipiv);
}


} } // end nm::math

#endif


================================================
FILE: ext/nmatrix/math/getrs.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == getrs.h
//
// getrs function in native C++.
//

/*
 *             Automatically Tuned Linear Algebra Software v3.8.4
 *                    (C) Copyright 1999 R. Clint Whaley
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the ATLAS group or the names of its contributers may
 *      not be used to endorse or promote products derived from this
 *      software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef GETRS_H
#define GETRS_H

namespace nm { namespace math {


/*
 * Solves a system of linear equations A*X = B with a general NxN matrix A using the LU factorization computed by GETRF.
 *
 * From ATLAS 3.8.0.
 */
template <typename DType>
int getrs(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N, const int NRHS, const DType* A,
           const int lda, const int* ipiv, DType* B, const int ldb)
{
  // enum CBLAS_DIAG Lunit, Uunit; // These aren't used. Not sure why they're declared in ATLAS' src.

  if (!N || !NRHS) return 0;

  const DType ONE = 1;

  if (Order == CblasColMajor) {
    if (Trans == CblasNoTrans) {
      nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, 1);
      nm::math::trsm<DType>(Order, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, N, NRHS, ONE, A, lda, B, ldb);
      nm::math::trsm<DType>(Order, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, NRHS, ONE, A, lda, B, ldb);
    } else {
      nm::math::trsm<DType>(Order, CblasLeft, CblasUpper, Trans, CblasNonUnit, N, NRHS, ONE, A, lda, B, ldb);
      nm::math::trsm<DType>(Order, CblasLeft, CblasLower, Trans, CblasUnit, N, NRHS, ONE, A, lda, B, ldb);
      nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, -1);
    }
  } else {
    if (Trans == CblasNoTrans) {
      nm::math::trsm<DType>(Order, CblasRight, CblasLower, CblasTrans, CblasNonUnit, NRHS, N, ONE, A, lda, B, ldb);
      nm::math::trsm<DType>(Order, CblasRight, CblasUpper, CblasTrans, CblasUnit, NRHS, N, ONE, A, lda, B, ldb);
      nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, -1);
    } else {
      nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, 1);
      nm::math::trsm<DType>(Order, CblasRight, CblasUpper, CblasNoTrans, CblasUnit, NRHS, N, ONE, A, lda, B, ldb);
      nm::math::trsm<DType>(Order, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, NRHS, N, ONE, A, lda, B, ldb);
    }
  }
  return 0;
}


/*
* Function signature conversion for calling LAPACK's getrs functions as directly as possible.
*
* For documentation: http://www.netlib.org/lapack/double/dgetrs.f
*
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
*/
template <typename DType>
inline int clapack_getrs(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const int n, const int nrhs,
                         const void* a, const int lda, const int* ipiv, void* b, const int ldb) {
  return getrs<DType>(order, trans, n, nrhs, reinterpret_cast<const DType*>(a), lda, ipiv, reinterpret_cast<DType*>(b), ldb);
}


} } // end nm::math

#endif // GETRS_H


================================================
FILE: ext/nmatrix/math/imax.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == imax.h
//
// BLAS level 1 function imax.
//

#ifndef IMAX_H
#define IMAX_H

#include "math/magnitude.h"

namespace nm { namespace math {


template<typename DType>
inline int imax(const int n, const DType *x, const int incx) {

  if (n < 1 || incx <= 0) {
    return -1;
  }
  if (n == 1) {
    return 0;
  }

  typename MagnitudeDType<DType>::type dmax;
  int imax = 0;

  if (incx == 1) { // if incrementing by 1

    dmax = magnitude(x[0]);

    for (int i = 1; i < n; ++i) {
      if (magnitude(x[i]) > dmax) {
        imax = i;
        dmax = magnitude(x[i]);
      }
    }

  } else { // if incrementing by more than 1

    dmax = magnitude(x[0]);

    for (int i = 1, ix = incx; i < n; ++i, ix += incx) {
      if (magnitude(x[ix]) > dmax) {
        imax = i;
        dmax = magnitude(x[ix]);
      }
    }
  }
  return imax;
}

template<typename DType>
inline int cblas_imax(const int n, const void* x, const int incx) {
  return imax<DType>(n, reinterpret_cast<const DType*>(x), incx);
}

}} // end of namespace nm::math

#endif /* IMAX_H */


================================================
FILE: ext/nmatrix/math/laswp.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == laswp.h
//
// laswp function in native C++.
//
/*
 *             Automatically Tuned Linear Algebra Software v3.8.4
 *                    (C) Copyright 1999 R. Clint Whaley
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the ATLAS group or the names of its contributers may
 *      not be used to endorse or promote products derived from this
 *      software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef LASWP_H
#define LASWP_H

namespace nm { namespace math {


/*
 * ATLAS function which performs row interchanges on a general rectangular matrix. Modeled after the LAPACK LASWP function.
 *
 * This version is templated for use by template <> getrf().
 */
template <typename DType>
inline void laswp(const int N, DType* A, const int lda, const int K1, const int K2, const int *piv, const int inci) {
  //const int n = K2 - K1; // not sure why this is declared. commented it out because it's unused.

  int nb = N >> 5;

  const int mr = N - (nb<<5);
  const int incA = lda << 5;

  if (K2 < K1) return;

  int i1, i2;
  if (inci < 0) {
    piv -= (K2-1) * inci;
    i1 = K2 - 1;
    i2 = K1;
  } else {
    piv += K1 * inci;
    i1 = K1;
    i2 = K2-1;
  }

  if (nb) {

    do {
      const int* ipiv = piv;
      int i           = i1;
      int KeepOn;

      do {
        int ip = *ipiv; ipiv += inci;

        if (ip != i) {
          DType *a0 = &(A[i]),
                *a1 = &(A[ip]);

          for (int h = 32; h; h--) {
            DType r   = *a0;
            *a0       = *a1;
            *a1       = r;

            a0 += lda;
            a1 += lda;
          }

        }
        if (inci > 0) KeepOn = (++i <= i2);
        else          KeepOn = (--i >= i2);

      } while (KeepOn);
      A += incA;
    } while (--nb);
  }

  if (mr) {
    const int* ipiv = piv;
    int i           = i1;
    int KeepOn;

    do {
      int ip = *ipiv; ipiv += inci;
      if (ip != i) {
        DType *a0 = &(A[i]),
              *a1 = &(A[ip]);

        for (int h = mr; h; h--) {
          DType r   = *a0;
          *a0       = *a1;
          *a1       = r;

          a0 += lda;
          a1 += lda;
        }
      }

      if (inci > 0) KeepOn = (++i <= i2);
      else          KeepOn = (--i >= i2);

    } while (KeepOn);
  }
}


/*
* Function signature conversion for calling LAPACK's laswp functions as directly as possible.
*
* For documentation: http://www.netlib.org/lapack/double/dlaswp.f
*
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
*/
template <typename DType>
inline void clapack_laswp(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) {
  laswp<DType>(n, reinterpret_cast<DType*>(a), lda, k1, k2, ipiv, incx);
}

} }  // namespace nm::math
#endif // LASWP_H


================================================
FILE: ext/nmatrix/math/long_dtype.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == long_dtype.h
//
// Declarations necessary for the native versions of GEMM and GEMV,
// as well as for IMAX.
//

#ifndef LONG_DTYPE_H
#define LONG_DTYPE_H

namespace nm { namespace math {
  // These allow an increase in precision for intermediate values of gemm and gemv.
  // See also: http://stackoverflow.com/questions/11873694/how-does-one-increase-precision-in-c-templates-in-a-template-typename-dependen
  template <typename DType> struct LongDType;
  template <> struct LongDType<uint8_t> { typedef int16_t type; };
  template <> struct LongDType<int8_t> { typedef int16_t type; };
  template <> struct LongDType<int16_t> { typedef int32_t type; };
  template <> struct LongDType<int32_t> { typedef int64_t type; };
  template <> struct LongDType<int64_t> { typedef int64_t type; };
  template <> struct LongDType<float> { typedef double type; };
  template <> struct LongDType<double> { typedef double type; };
  template <> struct LongDType<Complex64> { typedef Complex128 type; };
  template <> struct LongDType<Complex128> { typedef Complex128 type; };
  template <> struct LongDType<RubyObject> { typedef RubyObject type; };

  template <typename DType> struct MagnitudeDType;
  template <> struct MagnitudeDType<uint8_t> { typedef uint8_t type; };
  template <> struct MagnitudeDType<int8_t> { typedef int8_t type; };
  template <> struct MagnitudeDType<int16_t> { typedef int16_t type; };
  template <> struct MagnitudeDType<int32_t> { typedef int32_t type; };
  template <> struct MagnitudeDType<int64_t> { typedef int64_t type; };
  template <> struct MagnitudeDType<float> { typedef float type; };
  template <> struct MagnitudeDType<double> { typedef double type; };
  template <> struct MagnitudeDType<Complex64> { typedef float type; };
  template <> struct MagnitudeDType<Complex128> { typedef double type; };
  template <> struct MagnitudeDType<RubyObject> { typedef RubyObject type; };
  
}} // end of namespace nm::math

#endif


================================================
FILE: ext/nmatrix/math/magnitude.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == math/magnitude.h
//
// Takes the absolute value (meaning magnitude) of each DType.
// Needed for a variety of BLAS/LAPACK functions.
//

#ifndef MAGNITUDE_H
#define MAGNITUDE_H

#include "math/long_dtype.h"

namespace nm { namespace math {

/* Magnitude -- may be complicated for unsigned types, and need to call the correct STL abs for floats/doubles */ 
template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
inline MDType magnitude(const DType& v) {
  return v.abs();
}
template <> inline float magnitude(const float& v) { return std::abs(v); }
template <> inline double magnitude(const double& v) { return std::abs(v); }
template <> inline uint8_t magnitude(const uint8_t& v) { return v; }
template <> inline int8_t magnitude(const int8_t& v) { return std::abs(v); }
template <> inline int16_t magnitude(const int16_t& v) { return std::abs(v); }
template <> inline int32_t magnitude(const int32_t& v) { return std::abs(v); }
template <> inline int64_t magnitude(const int64_t& v) { return std::abs(v); }
template <> inline float magnitude(const nm::Complex64& v) { return std::sqrt(v.r * v.r + v.i * v.i); }
template <> inline double magnitude(const nm::Complex128& v) { return std::sqrt(v.r * v.r + v.i * v.i); } 
    
}}

#endif // MAGNITUDE_H


================================================
FILE: ext/nmatrix/math/math.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == math.h
//
// Header file for math functions, interfacing with BLAS, etc.
//
// For instructions on adding CBLAS and CLAPACK functions, see the
// beginning of math.cpp.
//
// Some of these functions are from ATLAS. Here is the license for
// ATLAS:
//
/*
 *             Automatically Tuned Linear Algebra Software v3.8.4
 *                    (C) Copyright 1999 R. Clint Whaley
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the ATLAS group or the names of its contributers may
 *      not be used to endorse or promote products derived from this
 *      software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef MATH_H
#define MATH_H

/*
 * Standard Includes
 */

#include "cblas_enums.h"

#include <algorithm> // std::min, std::max
#include <limits> // std::numeric_limits
#include <memory> // std::unique_ptr

/*
 * Project Includes
 */

/*
 * Macros
 */
#define REAL_RECURSE_LIMIT 4

/*
 * Data
 */


extern "C" {
  /*
   * C accessors.
   */

  void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size);
  void nm_math_init_blas(void);

  /*
   * Pure math implementations.
   */  
  void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv);
  void nm_math_inverse(const int M, void* A_elements, nm::dtype_t dtype);
  void nm_math_hessenberg(VALUE a);
  void nm_math_det_exact_from_dense(const int M, const void* elements, 
      const int lda, nm::dtype_t dtype, void* result);
  void nm_math_det_exact_from_yale(const int M, const YALE_STORAGE* storage, 
      const int lda, nm::dtype_t dtype, void* result);
  void nm_math_inverse_exact_from_dense(const int M, const void* A_elements, 
      const int lda, void* B_elements, const int ldb, nm::dtype_t dtype);
  void nm_math_inverse_exact_from_yale(const int M, const YALE_STORAGE* storage, 
      const int lda, YALE_STORAGE* inverse, const int ldb, nm::dtype_t dtype);
}


namespace nm {
  namespace math {

/*
 * Types
 */


/*
 * Functions
 */

// Yale: numeric matrix multiply c=a*b
template <typename DType>
inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
            const IType* ib, const IType* jb, const DType* b, const bool diagb, IType* ic, IType* jc, DType* c, const bool diagc) {
  const unsigned int max_lmn = std::max(std::max(m, n), l);
  std::unique_ptr<IType[]> next(new IType[max_lmn]);
  std::unique_ptr<DType[]> sums(new DType[max_lmn]);

  DType v;

  IType head, length, temp, ndnz = 0;
  IType minmn = std::min(m,n);
  IType minlm = std::min(l,m);

  for (IType idx = 0; idx < max_lmn; ++idx) { // initialize scratch arrays
    next[idx] = std::numeric_limits<IType>::max();
    sums[idx] = 0;
  }

  for (IType i = 0; i < n; ++i) { // walk down the rows
    head = std::numeric_limits<IType>::max()-1; // head gets assigned as whichever column of B's row j we last visited
    length = 0;

    for (IType jj = ia[i]; jj <= ia[i+1]; ++jj) { // walk through entries in each row
      IType j;

      if (jj == ia[i+1]) { // if we're in the last entry for this row:
        if (!diaga || i >= minmn) continue;
        j   = i;      // if it's a new Yale matrix, and last entry, get the diagonal position (j) and entry (ajj)
        v   = a[i];
      } else {
        j   = ja[jj]; // if it's not the last entry for this row, get the column (j) and entry (ajj)
        v   = a[jj];
      }

      for (IType kk = ib[j]; kk <= ib[j+1]; ++kk) {

        IType k;

        if (kk == ib[j+1]) { // Get the column id for that entry
          if (!diagb || j >= minlm) continue;
          k  = j;
          sums[k] += v*b[k];
        } else {
          k  = jb[kk];
          sums[k] += v*b[kk];
        }

        if (next[k] == std::numeric_limits<IType>::max()) {
          next[k] = head;
          head    = k;
          ++length;
        }
      } // end of kk loop
    } // end of jj loop

    for (IType jj = 0; jj < length; ++jj) {
      if (sums[head] != 0) {
        if (diagc && head == i) {
          c[head] = sums[head];
        } else {
          jc[n+1+ndnz] = head;
          c[n+1+ndnz]  = sums[head];
          ++ndnz;
        }
      }

      temp = head;
      head = next[head];

      next[temp] = std::numeric_limits<IType>::max();
      sums[temp] = 0;
    }

    ic[i+1] = n+1+ndnz;
  }
} /* numbmm_ */


/*
template <typename DType, typename IType>
inline void new_yale_matrix_multiply(const unsigned int m, const IType* ija, const DType* a, const IType* ijb, const DType* b, YALE_STORAGE* c_storage) {
  unsigned int n = c_storage->shape[0],
               l = c_storage->shape[1];

  // Create a working vector of dimension max(m,l,n) and initial value IType::max():
  std::vector<IType> mask(std::max(std::max(m,l),n), std::numeric_limits<IType>::max());

  for (IType i = 0; i < n; ++i) { // A.rows.each_index do |i|

    IType j, k;
    size_t ndnz;

    for (IType jj = ija[i]; jj <= ija[i+1]; ++jj) { // walk through column pointers for row i of A
      j = (jj == ija[i+1]) ? i : ija[jj];   // Get the current column index (handle diagonals last)

      if (j >= m) {
        if (j == ija[jj]) rb_raise(rb_eIndexError, "ija array for left-hand matrix contains an out-of-bounds column index %u at position %u", jj, j);
        else              break;
      }

      for (IType kk = ijb[j]; kk <= ijb[j+1]; ++kk) { // walk through column pointers for row j of B
        if (j >= m) continue; // first of all, does B *have* a row j?
        k = (kk == ijb[j+1]) ? j : ijb[kk];   // Get the current column index (handle diagonals last)

        if (k >= l) {
          if (k == ijb[kk]) rb_raise(rb_eIndexError, "ija array for right-hand matrix contains an out-of-bounds column index %u at position %u", kk, k);
          else              break;
        }

        if (mask[k] == )
      }

    }
  }
}
*/

// Yale: Symbolic matrix multiply c=a*b
inline size_t symbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const bool diaga,
            const IType* ib, const IType* jb, const bool diagb, IType* ic, const bool diagc) {
  unsigned int max_lmn = std::max(std::max(m,n), l);
  IType mask[max_lmn];  // INDEX in the SMMP paper.
  IType j, k; /* Local variables */
  size_t ndnz = n;

  for (IType idx = 0; idx < max_lmn; ++idx)
    mask[idx] = std::numeric_limits<IType>::max();

  if (ic) { // Only write to ic if it's supplied; otherwise, we're just counting.
    if (diagc)  ic[0] = n+1;
    else        ic[0] = 0;
  }

  IType minmn = std::min(m,n);
  IType minlm = std::min(l,m);

  for (IType i = 0; i < n; ++i) { // MAIN LOOP: through rows

    for (IType jj = ia[i]; jj <= ia[i+1]; ++jj) { // merge row lists, walking through columns in each row

      // j <- column index given by JA[jj], or handle diagonal.
      if (jj == ia[i+1]) { // Don't really do it the last time -- just handle diagonals in a new yale matrix.
        if (!diaga || i >= minmn) continue;
        j = i;
      } else j = ja[jj];

      for (IType kk = ib[j]; kk <= ib[j+1]; ++kk) { // Now walk through columns K of row J in matrix B.
        if (kk == ib[j+1]) {
          if (!diagb || j >= minlm) continue;
          k = j;
        } else k = jb[kk];

        if (mask[k] != i) {
          mask[k] = i;
          ++ndnz;
        }
      }
    }

    if (diagc && mask[i] == std::numeric_limits<IType>::max()) --ndnz;

    if (ic) ic[i+1] = ndnz;
  }

  return ndnz;
} /* symbmm_ */


// In-place quicksort (from Wikipedia) -- called by smmp_sort_columns, below. All functions are inclusive of left, right.
namespace smmp_sort {
  const size_t THRESHOLD = 4;  // switch to insertion sort for 4 elements or fewer

  template <typename DType>
  void print_array(DType* vals, IType* array, IType left, IType right) {
    for (IType i = left; i <= right; ++i) {
      std::cerr << array[i] << ":" << vals[i] << "  ";
    }
    std::cerr << std::endl;
  }

  template <typename DType>
  IType partition(DType* vals, IType* array, IType left, IType right, IType pivot) {
    IType pivotJ = array[pivot];
    DType pivotV = vals[pivot];

    // Swap pivot and right
    array[pivot] = array[right];
    vals[pivot]  = vals[right];
    array[right] = pivotJ;
    vals[right]  = pivotV;

    IType store = left;
    for (IType idx = left; idx < right; ++idx) {
      if (array[idx] <= pivotJ) {
        // Swap i and store
        std::swap(array[idx], array[store]);
        std::swap(vals[idx],  vals[store]);
        ++store;
      }
    }

    std::swap(array[store], array[right]);
    std::swap(vals[store],  vals[right]);

    return store;
  }

  // Recommended to use the median of left, right, and mid for the pivot.
  template <typename I>
  inline I median(I a, I b, I c) {
    if (a < b) {
      if (b < c) return b; // a b c
      if (a < c) return c; // a c b
                 return a; // c a b

    } else { // a > b
      if (a < c) return a; // b a c
      if (b < c) return c; // b c a
                 return b; // c b a
    }
  }


  // Insertion sort is more efficient than quicksort for small N
  template <typename DType>
  void insertion_sort(DType* vals, IType* array, IType left, IType right) {
    for (IType idx = left; idx <= right; ++idx) {
      IType col_to_insert = array[idx];
      DType val_to_insert = vals[idx];

      IType hole_pos = idx;
      for (; hole_pos > left && col_to_insert < array[hole_pos-1]; --hole_pos) {
        array[hole_pos] = array[hole_pos - 1];  // shift the larger column index up
        vals[hole_pos]  = vals[hole_pos - 1];   // value goes along with it
      }

      array[hole_pos] = col_to_insert;
      vals[hole_pos]  = val_to_insert;
    }
  }


  template <typename DType>
  void quicksort(DType* vals, IType* array, IType left, IType right) {

    if (left < right) {
      if (right - left < THRESHOLD) {
        insertion_sort(vals, array, left, right);
      } else {
        // choose any pivot such that left < pivot < right
        IType pivot = median<IType>(left, right, (IType)(((unsigned long)left + (unsigned long)right) / 2));
        pivot = partition(vals, array, left, right, pivot);

        // recursively sort elements smaller than the pivot
        quicksort<DType>(vals, array, left, pivot-1);

        // recursively sort elements at least as big as the pivot
        quicksort<DType>(vals, array, pivot+1, right);
      }
    }
  }


}; // end of namespace smmp_sort


/*
 * For use following symbmm and numbmm. Sorts the matrix entries in each row according to the column index.
 * This utilizes quicksort, which is an in-place unstable sort (since there are no duplicate entries, we don't care
 * about stability).
 *
 * TODO: It might be worthwhile to do a test for free memory, and if available, use an unstable sort that isn't in-place.
 *
 * TODO: It's actually probably possible to write an even faster sort, since symbmm/numbmm are not producing a random
 * ordering. If someone is doing a lot of Yale matrix multiplication, it might benefit them to consider even insertion
 * sort.
 */
template <typename DType>
inline void smmp_sort_columns(const size_t n, const IType* ia, IType* ja, DType* a) {
  for (size_t i = 0; i < n; ++i) {
    if (ia[i+1] - ia[i] < 2) continue; // no need to sort rows containing only one or two elements.
    else if (ia[i+1] - ia[i] <= smmp_sort::THRESHOLD) {
      smmp_sort::insertion_sort<DType>(a, ja, ia[i], ia[i+1]-1); // faster for small rows
    } else {
      smmp_sort::quicksort<DType>(a, ja, ia[i], ia[i+1]-1);      // faster for large rows (and may call insertion_sort as well)
    }
  }
}


// Copies an upper row-major array from U, zeroing U; U is unit, so diagonal is not copied.
//
// From ATLAS 3.8.0.
template <typename DType>
static inline void trcpzeroU(const int M, const int N, DType* U, const int ldu, DType* C, const int ldc) {

  for (int i = 0; i != M; ++i) {
    for (int j = i+1; j < N; ++j) {
      C[j] = U[j];
      U[j] = 0;
    }

    C += ldc;
    U += ldu;
  }
}


/*
 * Un-comment the following lines when we figure out how to calculate NB for each of the ATLAS-derived
 * functions. This is probably really complicated.
 *
 * Also needed: ATL_MulByNB, ATL_DivByNB (both defined in the build process for ATLAS), and ATL_mmMU.
 *
 */

/*

template <bool RowMajor, bool Upper, typename DType>
static int trtri_4(const enum CBLAS_DIAG Diag, DType* A, const int lda) {

  if (RowMajor) {
    DType *pA0 = A, *pA1 = A+lda, *pA2 = A+2*lda, *pA3 = A+3*lda;
    DType tmp;
    if (Upper) {
      DType A01 = pA0[1], A02 = pA0[2], A03 = pA0[3],
                          A12 = pA1[2], A13 = pA1[3],
                                        A23 = pA2[3];

      if (Diag == CblasNonUnit) {
        pA0->inverse();
        (pA1+1)->inverse();
        (pA2+2)->inverse();
        (pA3+3)->inverse();

        pA0[1] = -A01 * pA1[1] * pA0[0];
        pA1[2] = -A12 * pA2[2] * pA1[1];
        pA2[3] = -A23 * pA3[3] * pA2[2];

        pA0[2] = -(A01 * pA1[2] + A02 * pA2[2]) * pA0[0];
        pA1[3] = -(A12 * pA2[3] + A13 * pA3[3]) * pA1[1];

        pA0[3] = -(A01 * pA1[3] + A02 * pA2[3] + A03 * pA3[3]) * pA0[0];

      } else {

        pA0[1] = -A01;
        pA1[2] = -A12;
        pA2[3] = -A23;

        pA0[2] = -(A01 * pA1[2] + A02);
        pA1[3] = -(A12 * pA2[3] + A13);

        pA0[3] = -(A01 * pA1[3] + A02 * pA2[3] + A03);
      }

    } else { // Lower
      DType A10 = pA1[0],
            A20 = pA2[0], A21 = pA2[1],
            A30 = PA3[0], A31 = pA3[1], A32 = pA3[2];
      DType *B10 = pA1,
            *B20 = pA2,
            *B30 = pA3,
            *B21 = pA2+1,
            *B31 = pA3+1,
            *B32 = pA3+2;


      if (Diag == CblasNonUnit) {
        pA0->inverse();
        (pA1+1)->inverse();
        (pA2+2)->inverse();
        (pA3+3)->inverse();

        *B10 = -A10 * pA0[0] * pA1[1];
        *B21 = -A21 * pA1[1] * pA2[2];
        *B32 = -A32 * pA2[2] * pA3[3];
        *B20 = -(A20 * pA0[0] + A21 * (*B10)) * pA2[2];
        *B31 = -(A31 * pA1[1] + A32 * (*B21)) * pA3[3];
        *B30 = -(A30 * pA0[0] + A31 * (*B10) + A32 * (*B20)) * pA3;
      } else {
        *B10 = -A10;
        *B21 = -A21;
        *B32 = -A32;
        *B20 = -(A20 + A21 * (*B10));
        *B31 = -(A31 + A32 * (*B21));
        *B30 = -(A30 + A31 * (*B10) + A32 * (*B20));
      }
    }

  } else {
    rb_raise(rb_eNotImpError, "only row-major implemented at this time");
  }

  return 0;

}


template <bool RowMajor, bool Upper, typename DType>
static int trtri_3(const enum CBLAS_DIAG Diag, DType* A, const int lda) {

  if (RowMajor) {

    DType tmp;

    if (Upper) {
      DType A01 = pA0[1], A02 = pA0[2], A03 = pA0[3],
                          A12 = pA1[2], A13 = pA1[3];

      DType *B01 = pA0 + 1,
            *B02 = pA0 + 2,
            *B12 = pA1 + 2;

      if (Diag == CblasNonUnit) {
        pA0->inverse();
        (pA1+1)->inverse();
        (pA2+2)->inverse();

        *B01 = -A01 * pA1[1] * pA0[0];
        *B12 = -A12 * pA2[2] * pA1[1];
        *B02 = -(A01 * (*B12) + A02 * pA2[2]) * pA0[0];
      } else {
        *B01 = -A01;
        *B12 = -A12;
        *B02 = -(A01 * (*B12) + A02);
      }

    } else { // Lower
      DType *pA0=A, *pA1=A+lda, *pA2=A+2*lda;
      DType A10=pA1[0],
            A20=pA2[0], A21=pA2[1];

      DType *B10 = pA1,
            *B20 = pA2;
            *B21 = pA2+1;

      if (Diag == CblasNonUnit) {
        pA0->inverse();
        (pA1+1)->inverse();
        (pA2+2)->inverse();
        *B10 = -A10 * pA0[0] * pA1[1];
        *B21 = -A21 * pA1[1] * pA2[2];
        *B20 = -(A20 * pA0[0] + A21 * (*B10)) * pA2[2];
      } else {
        *B10 = -A10;
        *B21 = -A21;
        *B20 = -(A20 + A21 * (*B10));
      }
    }


  } else {
    rb_raise(rb_eNotImpError, "only row-major implemented at this time");
  }

  return 0;

}

template <bool RowMajor, bool Upper, bool Real, typename DType>
static void trtri(const enum CBLAS_DIAG Diag, const int N, DType* A, const int lda) {
  DType *Age, *Atr;
  DType tmp;
  int Nleft, Nright;

  int ierr = 0;

  static const DType ONE = 1;
  static const DType MONE -1;
  static const DType NONE = -1;

  if (RowMajor) {

    // FIXME: Use REAL_RECURSE_LIMIT here for float32 and float64 (instead of 1)
    if ((Real && N > REAL_RECURSE_LIMIT) || (N > 1)) {
      Nleft = N >> 1;
#ifdef NB
      if (Nleft > NB) NLeft = ATL_MulByNB(ATL_DivByNB(Nleft));
#endif

      Nright = N - Nleft;

      if (Upper) {
        Age = A + Nleft;
        Atr = A + (Nleft * (lda+1));

        nm::math::trsm<DType>(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans, Diag,
                              Nleft, Nright, ONE, Atr, lda, Age, lda);

        nm::math::trsm<DType>(CblasRowMajor, CblasLeft, CblasUpper, CblasNoTrans, Diag,
                              Nleft, Nright, MONE, A, lda, Age, lda);

      } else { // Lower
        Age = A + ((Nleft*lda));
        Atr = A + (Nleft * (lda+1));

        nm::math::trsm<DType>(CblasRowMajor, CblasRight, CblasLower, CblasNoTrans, Diag,
                              Nright, Nleft, ONE, A, lda, Age, lda);
        nm::math::trsm<DType>(CblasRowMajor, CblasLeft, CblasLower, CblasNoTrans, Diag,
                              Nright, Nleft, MONE, Atr, lda, Age, lda);
      }

      ierr = trtri<RowMajor,Upper,Real,DType>(Diag, Nleft, A, lda);
      if (ierr) return ierr;

      ierr = trtri<RowMajor,Upper,Real,DType>(Diag, Nright, Atr, lda);
      if (ierr) return ierr + Nleft;

    } else {
      if (Real) {
        if (N == 4) {
          return trtri_4<RowMajor,Upper,Real,DType>(Diag, A, lda);
        } else if (N == 3) {
          return trtri_3<RowMajor,Upper,Real,DType>(Diag, A, lda);
        } else if (N == 2) {
          if (Diag == CblasNonUnit) {
            A->inverse();
            (A+(lda+1))->inverse();

            if (Upper) {
              *(A+1)     *=   *A;         // TRI_MUL
              *(A+1)     *=   *(A+lda+1); // TRI_MUL
            } else {
              *(A+lda)   *=   *A;         // TRI_MUL
              *(A+lda)   *=   *(A+lda+1); // TRI_MUL
            }
          }

          if (Upper) *(A+1)   = -*(A+1);      // TRI_NEG
          else       *(A+lda) = -*(A+lda);    // TRI_NEG
        } else if (Diag == CblasNonUnit) A->inverse();
      } else { // not real
        if (Diag == CblasNonUnit) A->inverse();
      }
    }

  } else {
    rb_raise(rb_eNotImpError, "only row-major implemented at this time");
  }

  return ierr;
}


template <bool RowMajor, bool Real, typename DType>
int getri(const int N, DType* A, const int lda, const int* ipiv, DType* wrk, const int lwrk) {

  if (!RowMajor) rb_raise(rb_eNotImpError, "only row-major implemented at this time");

  int jb, nb, I, ndown, iret;

  const DType ONE = 1, NONE = -1;

  int iret = trtri<RowMajor,false,Real,DType>(CblasNonUnit, N, A, lda);
  if (!iret && N > 1) {
    jb = lwrk / N;
    if (jb >= NB) nb = ATL_MulByNB(ATL_DivByNB(jb));
    else if (jb >= ATL_mmMU) nb = (jb/ATL_mmMU)*ATL_mmMU;
    else nb = jb;
    if (!nb) return -6; // need at least 1 row of workspace

    // only first iteration will have partial block, unroll it

    jb = N - (N/nb) * nb;
    if (!jb) jb = nb;
    I = N - jb;
    A += lda * I;
    trcpzeroU<DType>(jb, jb, A+I, lda, wrk, jb);
    nm::math::trsm<DType>(CblasRowMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasUnit,
                          jb, N, ONE, wrk, jb, A, lda);

    if (I) {
      do {
        I -= nb;
        A -= nb * lda;
        ndown = N-I;
        trcpzeroU<DType>(nb, ndown, A+I, lda, wrk, ndown);
        nm::math::gemm<DType>(CblasRowMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasUnit,
                              nb, N, ONE, wrk, ndown, A, lda);
      } while (I);
    }

    // Apply row interchanges

    for (I = N - 2; I >= 0; --I) {
      jb = ipiv[I];
      if (jb != I) nm::math::swap<DType>(N, A+I*lda, 1, A+jb*lda, 1);
    }
  }

  return iret;
}
*/

/*
 * Macro for declaring LAPACK specializations of the getrf function.
 *
 * type is the DType; call is the specific function to call; cast_as is what the DType* should be
 * cast to in order to pass it to LAPACK.
 */
#define LAPACK_GETRF(type, call, cast_as)                                     \
template <>                                                                   \
inline int getrf(const enum CBLAS_ORDER Order, const int M, const int N, type * A, const int lda, int* ipiv) { \
  int info = call(Order, M, N, reinterpret_cast<cast_as *>(A), lda, ipiv);    \
  if (!info) return info;                                                     \
  else {                                                                      \
    rb_raise(rb_eArgError, "getrf: problem with argument %d\n", info);        \
    return info;                                                              \
  }                                                                           \
}

/* Specialize for ATLAS types */
/*LAPACK_GETRF(float,      clapack_sgetrf, float)
LAPACK_GETRF(double,     clapack_dgetrf, double)
LAPACK_GETRF(Complex64,  clapack_cgetrf, void)
LAPACK_GETRF(Complex128, clapack_zgetrf, void)
*/

}} // end namespace nm::math


#endif // MATH_H


================================================
FILE: ext/nmatrix/math/nrm2.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == nrm2.h
//
// CBLAS nrm2 function
//

/*
 *             Automatically Tuned Linear Algebra Software v3.8.4
 *                    (C) Copyright 1999 R. Clint Whaley
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the ATLAS group or the names of its contributers may
 *      not be used to endorse or promote products derived from this
 *      software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef NRM2_H
# define NRM2_H

#include "math/long_dtype.h"


namespace nm { namespace math {

/*
 * Level 1 BLAS routine which returns the 2-norm of an n-vector x.
 #
 * Based on input types, these are the valid return types:
 *    int -> int
 *    float -> float or double
 *    double -> double
 *    complex64 -> float or double
 *    complex128 -> double
 */
template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
MDType nrm2(const int N, const DType* X, const int incX) {
  const DType ONE = 1, ZERO = 0;
  typename LongDType<DType>::type scale = 0, ssq = 1, absxi, temp;


  if ((N < 1) || (incX < 1))    return ZERO;
  else if (N == 1)              return std::abs(X[0]);

  for (int i = 0; i < N; ++i) {
    absxi = std::abs(X[i*incX]);
    if (scale < absxi) {
      temp  = scale / absxi;
      scale = absxi;
      ssq   = ONE + ssq * (temp * temp);
    }
    else if(scale != 0) {
      temp = absxi / scale;
      ssq += temp * temp;
    }
  }

  return (MDType)(scale * std::sqrt( ssq ));
}


template <typename FloatDType>
static inline void nrm2_complex_helper(const FloatDType& xr, const FloatDType& xi, double& scale, double& ssq) {
  double absx = std::abs(xr);
  if (scale < absx) {
    double temp  = scale / absx;
    scale = absx;
    ssq   = 1.0 + ssq * (temp * temp);
  }
  else if(scale != 0)  {
    double temp = absx / scale;
    ssq += temp * temp;
  }

  absx = std::abs(xi);
  if (scale < absx) {
    double temp  = scale / absx;
    scale = absx;
    ssq   = 1.0 + ssq * (temp * temp);
  }
  else if(scale != 0)  {
    double temp = absx / scale;
    ssq += temp * temp;
  }
}

template <>
float nrm2(const int N, const Complex64* X, const int incX) {
  double scale = 0, ssq = 1;

  if ((N < 1) || (incX < 1))    return 0.0;

  for (int i = 0; i < N; ++i) {
    nrm2_complex_helper<float>(X[i*incX].r, X[i*incX].i, scale, ssq);
  }

  return scale * std::sqrt( ssq );
}

// FIXME: Function above is duplicated here, should be writeable as a template using
// FIXME: xMagnitudeDType.
template <>
double nrm2(const int N, const Complex128* X, const int incX) {
  double scale = 0, ssq = 1;

  if ((N < 1) || (incX < 1))    return 0.0;

  for (int i = 0; i < N; ++i) {
    nrm2_complex_helper<double>(X[i*incX].r, X[i*incX].i, scale, ssq);
  }

  return scale * std::sqrt( ssq );
}

template <typename DType, typename MDType = typename MagnitudeDType<DType>::type>
inline void cblas_nrm2(const int N, const void* X, const int incX, void* result) {
  *reinterpret_cast<MDType*>( result ) = nrm2<DType, MDType>( N, reinterpret_cast<const DType*>(X), incX );
}


}} // end of namespace nm::math

#endif // NRM2_H


================================================
FILE: ext/nmatrix/math/rot.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == rot.h
//
// BLAS rot function in native C++.
//

/*
 *             Automatically Tuned Linear Algebra Software v3.8.4
 *                    (C) Copyright 1999 R. Clint Whaley
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the ATLAS group or the names of its contributers may
 *      not be used to endorse or promote products derived from this
 *      software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef ROT_H
# define ROT_H

namespace nm { namespace math {


// TODO: Test this to see if it works properly on complex. ATLAS has a separate algorithm for complex, which looks like
// TODO: it may actually be the same one.
//
// This function is called ATL_rot in ATLAS 3.8.4.
template <typename DType>
inline void rot_helper(const int N, DType* X, const int incX, DType* Y, const int incY, const DType c, const DType s) {
  if (c != 1 || s != 0) {
    if (incX == 1 && incY == 1) {
      for (int i = 0; i != N; ++i) {
        DType tmp = X[i] * c + Y[i] * s;
        Y[i]      = Y[i] * c - X[i] * s;
        X[i]      = tmp;
      }
    } else {
      for (int i = N; i > 0; --i, Y += incY, X += incX) {
        DType tmp = *X * c + *Y * s;
        *Y  = *Y * c - *X * s;
        *X  = tmp;
      }
    }
  }
}


/* Applies a plane rotation. From ATLAS 3.8.4. */
template <typename DType, typename CSDType>
inline void rot(const int N, DType* X, const int incX, DType* Y, const int incY, const CSDType c, const CSDType s) {
  int incx = incX, incy = incY;
  DType *x = X, *y = Y;

  if (N > 0) {
    if (incX < 0) {
      if (incY < 0) { incx = -incx; incy = -incy; }
      else x += -incX * (N-1);
    } else if (incY < 0) {
      incy = -incy;
      incx = -incx;
      x += (N-1) * incX;
    }
    rot_helper<DType>(N, x, incx, y, incy, c, s);
  }
}

template <typename DType, typename CSDType>
inline void cblas_rot(const int N, void* X, const int incX, void* Y, const int incY, const void* c, const void* s) {
  rot<DType,CSDType>(N, reinterpret_cast<DType*>(X), incX, reinterpret_cast<DType*>(Y), incY,
                       *reinterpret_cast<const CSDType*>(c), *reinterpret_cast<const CSDType*>(s));
}


} } //nm::math

#endif // ROT_H


================================================
FILE: ext/nmatrix/math/rotg.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == rotg.h
//
// BLAS rotg function in native C++.
//

/*
 *             Automatically Tuned Linear Algebra Software v3.8.4
 *                    (C) Copyright 1999 R. Clint Whaley
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the ATLAS group or the names of its contributers may
 *      not be used to endorse or promote products derived from this
 *      software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef ROTG_H
# define ROTG_H

namespace nm { namespace math {

/* Givens plane rotation. From ATLAS 3.8.4. */
// FIXME: Not working properly for Ruby objects.
template <typename DType>
inline void rotg(DType* a, DType* b, DType* c, DType* s) {
  DType aa    = std::abs(*a), ab = std::abs(*b);
  DType roe   = aa > ab ? *a : *b;
  DType scal  = aa + ab;

  if (scal == 0) {
    *c =  1;
    *s = *a = *b = 0;
  } else {
    DType t0  = aa / scal, t1 = ab / scal;
    DType r   = scal * std::sqrt(t0 * t0 + t1 * t1);
    if (roe < 0) r = -r;
    *c = *a / r;
    *s = *b / r;
    DType z   = (*c != 0) ? (1 / *c) : DType(1);
    *a = r;
    *b = z;
  }
}

template <>
inline void rotg(Complex64* a, Complex64* b, Complex64* c, Complex64* s) {
  rb_raise(rb_eNotImpError, "BLAS not available, and existing template requires modification for complex");
}

template <>
inline void rotg(Complex128* a, Complex128* b, Complex128* c, Complex128* s) {
  rb_raise(rb_eNotImpError, "BLAS not available, and existing template requires modification for complex");
}


template <typename DType>
inline void cblas_rotg(void* a, void* b, void* c, void* s) {
  rotg<DType>(reinterpret_cast<DType*>(a), reinterpret_cast<DType*>(b), reinterpret_cast<DType*>(c), reinterpret_cast<DType*>(s));
}


} } //nm::math

#endif // ROTG_H


================================================
FILE: ext/nmatrix/math/scal.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == scal.h
//
// BLAS scal function.
//

#ifndef SCAL_H
#define SCAL_H

namespace nm { namespace math {

/*  Purpose */
/*  ======= */

/*     DSCAL scales a vector by a constant. */
/*     uses unrolled loops for increment equal to one. */

/*  Further Details */
/*  =============== */

/*     jack dongarra, linpack, 3/11/78. */
/*     modified 3/93 to return if incx .le. 0. */
/*     modified 12/3/93, array(1) declarations changed to array(*) */

/*  ===================================================================== */

template <typename DType>
inline void scal(const int n, const DType scalar, DType* x, const int incx) {

  if (n <= 0 || incx <= 0) {
    return;
  }

  for (int i = 0; incx < 0 ? i > n*incx : i < n*incx; i += incx) {
    x[i] = scalar * x[i];
  }
}

/*
 * Function signature conversion for LAPACK's scal function.
 */
template <typename DType>
inline void cblas_scal(const int n, const void* scalar, void* x, const int incx) {
  scal<DType>(n, *reinterpret_cast<const DType*>(scalar), reinterpret_cast<DType*>(x), incx);
}

}} // end of nm::math

#endif


================================================
FILE: ext/nmatrix/math/trsm.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == trsm.h
//
// trsm function in native C++.
//
/*
 *             Automatically Tuned Linear Algebra Software v3.8.4
 *                    (C) Copyright 1999 R. Clint Whaley
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the ATLAS group or the names of its contributers may
 *      not be used to endorse or promote products derived from this
 *      software without specific written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef TRSM_H
#define TRSM_H


namespace nm { namespace math {


/*
 * This version of trsm doesn't do any error checks and only works on column-major matrices.
 *
 * For row major, call trsm<DType> instead. That will handle necessary changes-of-variables
 * and parameter checks.
 *
 * Note that some of the boundary conditions here may be incorrect. Very little has been tested!
 * This was converted directly from dtrsm.f using f2c, and then rewritten more cleanly.
 */
template <typename DType>
inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                         const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
                         const int m, const int n, const DType alpha, const DType* a,
                         const int lda, DType* b, const int ldb)
{

  // (row-major) trsm: left upper trans nonunit m=3 n=1 1/1 a 3 b 3

  if (m == 0 || n == 0) return; /* Quick return if possible. */
  
  // Apply necessary offset
  a -= 1 + lda;
  b -= 1 + ldb;

  if (alpha == 0) { // Handle alpha == 0
    for (int j = 1; j <= n; ++j) {
      for (int i = 1; i <= m; ++i) {
        b[i + j * ldb] = 0;
      }
    }
    return;
  }

  if (side == CblasLeft) {
    if (trans_a == CblasNoTrans) {

      /* Form  B := alpha*inv( A )*B. */
      if (uplo == CblasUpper) {
        for (int j = 1; j <= n; ++j) {
          if (alpha != 1) {
            for (int i = 1; i <= m; ++i) {
              b[i + j * ldb] = alpha * b[i + j * ldb];
            }
          }
          for (int k = m; k >= 1; --k) {
            if (b[k + j * ldb] != 0) {
              if (diag == CblasNonUnit) {
                b[k + j * ldb] /= a[k + k * lda];
              }

              for (int i = 1; i <= k-1; ++i) {
                b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
              }
            }
          }
        }
      } else {
        for (int j = 1; j <= n; ++j) {
          if (alpha != 1) {
            for (int i = 1; i <= m; ++i) {
              b[i + j * ldb] = alpha * b[i + j * ldb];
            }
          }
          for (int k = 1; k <= m; ++k) {
            if (b[k + j * ldb] != 0.) {
              if (diag == CblasNonUnit) {
                b[k + j * ldb] /= a[k + k * lda];
              }
              for (int i = k+1; i <= m; ++i) {
                b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
              }
            }
          }
        }
      }
    } else { // CblasTrans

      /*           Form  B := alpha*inv( A**T )*B. */
      if (uplo == CblasUpper) {
        for (int j = 1; j <= n; ++j) {
          for (int i = 1; i <= m; ++i) {
            DType temp = alpha * b[i + j * ldb];
            for (int k = 1; k <= i-1; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
              temp -= a[k + i * lda] * b[k + j * ldb];
            }
            if (diag == CblasNonUnit) {
              temp /= a[i + i * lda];
            }
            b[i + j * ldb] = temp;
          }
        }
      } else {
        for (int j = 1; j <= n; ++j) {
          for (int i = m; i >= 1; --i) {
            DType temp= alpha * b[i + j * ldb];
            for (int k = i+1; k <= m; ++k) {
              temp -= a[k + i * lda] * b[k + j * ldb];
            }
            if (diag == CblasNonUnit) {
              temp /= a[i + i * lda];
            }
            b[i + j * ldb] = temp;
          }
        }
      }
    }
  } else { // right side

    if (trans_a == CblasNoTrans) {

      /*           Form  B := alpha*B*inv( A ). */

      if (uplo == CblasUpper) {
        for (int j = 1; j <= n; ++j) {
          if (alpha != 1) {
            for (int i = 1; i <= m; ++i) {
              b[i + j * ldb] = alpha * b[i + j * ldb];
            }
          }
          for (int k = 1; k <= j-1; ++k) {
            if (a[k + j * lda] != 0) {
              for (int i = 1; i <= m; ++i) {
                b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
              }
            }
          }
          if (diag == CblasNonUnit) {
            DType temp = 1 / a[j + j * lda];
            for (int i = 1; i <= m; ++i) {
              b[i + j * ldb] = temp * b[i + j * ldb];
            }
          }
        }
      } else {
        for (int j = n; j >= 1; --j) {
          if (alpha != 1) {
            for (int i = 1; i <= m; ++i) {
              b[i + j * ldb] = alpha * b[i + j * ldb];
            }
          }

          for (int k = j+1; k <= n; ++k) {
            if (a[k + j * lda] != 0.) {
              for (int i = 1; i <= m; ++i) {
                b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
              }
            }
          }
          if (diag == CblasNonUnit) {
            DType temp = 1 / a[j + j * lda];

            for (int i = 1; i <= m; ++i) {
              b[i + j * ldb] = temp * b[i + j * ldb];
            }
          }
        }
      }
    } else { // CblasTrans

      /*           Form  B := alpha*B*inv( A**T ). */

      if (uplo == CblasUpper) {
        for (int k = n; k >= 1; --k) {
          if (diag == CblasNonUnit) {
            DType temp= 1 / a[k + k * lda];
            for (int i = 1; i <= m; ++i) {
              b[i + k * ldb] = temp * b[i + k * ldb];
            }
          }
          for (int j = 1; j <= k-1; ++j) {
            if (a[j + k * lda] != 0.) {
              DType temp= a[j + k * lda];
              for (int i = 1; i <= m; ++i) {
                b[i + j * ldb] -= temp * b[i + k *  ldb];
              }
            }
          }
          if (alpha != 1) {
            for (int i = 1; i <= m; ++i) {
              b[i + k * ldb] = alpha * b[i + k * ldb];
            }
          }
        }
      } else {
        for (int k = 1; k <= n; ++k) {
          if (diag == CblasNonUnit) {
            DType temp = 1 / a[k + k * lda];
            for (int i = 1; i <= m; ++i) {
              b[i + k * ldb] = temp * b[i + k * ldb];
            }
          }
          for (int j = k+1; j <= n; ++j) {
            if (a[j + k * lda] != 0.) {
              DType temp = a[j + k * lda];
              for (int i = 1; i <= m; ++i) {
                b[i + j * ldb] -= temp * b[i + k * ldb];
              }
            }
          }
          if (alpha != 1) {
            for (int i = 1; i <= m; ++i) {
              b[i + k * ldb] = alpha * b[i + k * ldb];
            }
          }
        }
      }
    }
  }
}

/*
 * BLAS' DTRSM function, generalized.
 */
template <typename DType, typename = typename std::enable_if<!std::is_integral<DType>::value>::type>
inline void trsm(const enum CBLAS_ORDER order,
                 const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                 const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
                 const int m, const int n, const DType alpha, const DType* a,
                 const int lda, DType* b, const int ldb)
{
  /*using std::cerr;
  using std::endl;*/

  int                     num_rows_a = n;
  if (side == CblasLeft)  num_rows_a = m;

  if (lda < std::max(1,num_rows_a)) {
    fprintf(stderr, "TRSM: num_rows_a = %d; got lda=%d\n", num_rows_a, lda);
    rb_raise(rb_eArgError, "TRSM: Expected lda >= max(1, num_rows_a)");
  }

  // Test the input parameters.
  if (order == CblasRowMajor) {
    if (ldb < std::max(1,n)) {
      fprintf(stderr, "TRSM: M=%d; got ldb=%d\n", m, ldb);
      rb_raise(rb_eArgError, "TRSM: Expected ldb >= max(1,N)");
    }

    // For row major, need to switch side and uplo
    enum CBLAS_SIDE side_ = side == CblasLeft  ? CblasRight : CblasLeft;
    enum CBLAS_UPLO uplo_ = uplo == CblasUpper ? CblasLower : CblasUpper;

/*
    cerr << "(row-major) trsm: " << (side_ == CblasLeft ? "left " : "right ")
         << (uplo_ == CblasUpper ? "upper " : "lower ")
         << (trans_a == CblasTrans ? "trans " : "notrans ")
         << (diag == CblasNonUnit ? "nonunit " : "unit ")
         << n << " " << m << " " << alpha << " a " << lda << " b " << ldb << endl;
*/
    trsm_nothrow<DType>(side_, uplo_, trans_a, diag, n, m, alpha, a, lda, b, ldb);

  } else { // CblasColMajor

    if (ldb < std::max(1,m)) {
      fprintf(stderr, "TRSM: M=%d; got ldb=%d\n", m, ldb);
      rb_raise(rb_eArgError, "TRSM: Expected ldb >= max(1,M)");
    }
/*
    cerr << "(col-major) trsm: " << (side == CblasLeft ? "left " : "right ")
         << (uplo == CblasUpper ? "upper " : "lower ")
         << (trans_a == CblasTrans ? "trans " : "notrans ")
         << (diag == CblasNonUnit ? "nonunit " : "unit ")
         << m << " " << n << " " << alpha << " a " << lda << " b " << ldb << endl;
*/
    trsm_nothrow<DType>(side, uplo, trans_a, diag, m, n, alpha, a, lda, b, ldb);

  }

}

} }  // namespace nm::math
#endif // TRSM_H


================================================
FILE: ext/nmatrix/math/util.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == util.h
//
// Collect a few utility functions which convert ruby symbols into arguments
// that CBLAS or LAPACK can understand: either enum's for CBLAS or char's
// for LAPACK.
//

#ifndef UTIL_H
#define UTIL_H

/* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
 * into an enum recognized by cblas.
 *
 * Called by nm_cblas_gemm -- basically inline.
 *
 */
static inline enum CBLAS_TRANSPOSE blas_transpose_sym(VALUE op) {
  if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return CblasNoTrans;
  else if (rb_to_id(op) == nm_rb_transpose) return CblasTrans;
  else if (rb_to_id(op) == nm_rb_complex_conjugate) return CblasConjTrans;
  else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
  return CblasNoTrans;
}

/* Interprets transpose argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
 * into an character recognized by LAPACKE. LAPACKE uses a different system than CBLAS for this.
 *
 */
static inline char lapacke_transpose_sym(VALUE op) {
  if (op == Qfalse || rb_to_id(op) == nm_rb_no_transpose) return 'N';
  else if (rb_to_id(op) == nm_rb_transpose) return 'T';
  else if (rb_to_id(op) == nm_rb_complex_conjugate) return 'C';
  else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
  return 'N';
}

/*
 * Interprets cblas argument which could be :left or :right
 *
 * Called by nm_cblas_trsm -- basically inline
 */
static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
  ID op_id = rb_to_id(op);
  if (op_id == nm_rb_left)  return CblasLeft;
  if (op_id == nm_rb_right) return CblasRight;
  rb_raise(rb_eArgError, "Expected :left or :right for side argument");
  return CblasLeft;
}

/*
 * Interprets the LAPACK side argument which could be :left or :right
 * 
 * Related to obtaining Q in QR factorization after calling lapack_geqrf
 */

static inline char lapacke_side_sym(VALUE op) {
  ID op_id = rb_to_id(op);
  if (op_id == nm_rb_left)  return 'L';
  if (op_id == nm_rb_right) return 'R';
  else rb_raise(rb_eArgError, "Expected :left or :right for side argument");
  return 'L';
}

/*
 * Interprets cblas argument which could be :upper or :lower
 *
 * Called by nm_cblas_trsm -- basically inline
 */
static inline enum CBLAS_UPLO blas_uplo_sym(VALUE op) {
  ID op_id = rb_to_id(op);
  if (op_id == nm_rb_upper) return CblasUpper;
  if (op_id == nm_rb_lower) return CblasLower;
  rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
  return CblasUpper;
}

/*
 * Interprets argument which could be :upper or :lower for LAPACKE
 *
 * Called by nm_cblas_trsm -- basically inline
 */
static inline char lapacke_uplo_sym(VALUE op) {
  ID op_id = rb_to_id(op);
  if (op_id == nm_rb_upper) return 'U';
  if (op_id == nm_rb_lower) return 'L';
  rb_raise(rb_eArgError, "Expected :upper or :lower for uplo argument");
  return 'U';
}

/*
 * Interprets cblas argument which could be :unit (true) or :nonunit (false or anything other than true/:unit)
 *
 * Called by nm_cblas_trsm -- basically inline
 */
static inline enum CBLAS_DIAG blas_diag_sym(VALUE op) {
  if (rb_to_id(op) == nm_rb_unit || op == Qtrue) return CblasUnit;
  return CblasNonUnit;
}

/*
 * Interprets cblas argument which could be :row or :col
 * 
 * This function, unlike the other ones, works for LAPACKE as well as for CBLAS/CLAPACK.
 * Although LAPACKE calls this an int instead of a enum, the magic values are the same
 * (101 for row-major, 102 for column-major).
 */
static inline enum CBLAS_ORDER blas_order_sym(VALUE op) {
  if (rb_to_id(op) == rb_intern("row") || rb_to_id(op) == rb_intern("row_major")) return CblasRowMajor;
  else if (rb_to_id(op) == rb_intern("col") || rb_to_id(op) == rb_intern("col_major") ||
           rb_to_id(op) == rb_intern("column") || rb_to_id(op) == rb_intern("column_major")) return CblasColMajor;
  rb_raise(rb_eArgError, "Expected :row or :col for order argument");
  return CblasRowMajor;
}

/*
 * Interprets lapack jobu and jobvt arguments, for which LAPACK needs character values A, S, O, or N.
 *
 * Called by lapack_gesvd -- basically inline. svd stands for singular value decomposition.
 */
static inline char lapack_svd_job_sym(VALUE op) {
  if (rb_to_id(op) == rb_intern("all") || rb_to_id(op) == rb_intern("a")) return 'A';
  else if (rb_to_id(op) == rb_intern("return") || rb_to_id(op) == rb_intern("s")) return 'S';
  else if (rb_to_id(op) == rb_intern("overwrite") || rb_to_id(op) == rb_intern("o")) return 'O';
  else if (rb_to_id(op) == rb_intern("none") || rb_to_id(op) == rb_intern("n")) return 'N';
  else rb_raise(rb_eArgError, "Expected :all, :return, :overwrite, :none (or :a, :s, :o, :n, respectively)");
  return 'a';
}

/*
 * Interprets lapack jobvl and jobvr arguments, for which LAPACK needs character values N or V.
 *
 * Called by lapack_geev -- basically inline. evd stands for eigenvalue decomposition.
 */
static inline char lapack_evd_job_sym(VALUE op) {
  if (op == Qfalse || op == Qnil || rb_to_id(op) == rb_intern("n")) return 'N';
  else return 'V';
}

#endif


================================================
FILE: ext/nmatrix/math.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - present, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - present, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == math.cpp
//
// Ruby-exposed CBLAS and LAPACK functions that are available without
// an external library.
//
// === Procedure for adding CBLAS functions to math.cpp/math.h:
//
// This procedure is written as if for a fictional function with double
// version dbacon, which we'll say is from CBLAS.
//
// 1. Write a default templated version which probably returns a boolean.
//    Call it bacon, and put it in math.h.
//
//    template <typename DType>
//    bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
//      rb_raise(rb_eNotImpError, "only implemented for ATLAS types (float32, float64, complex64, complex128)");
//    }
//
//    Make sure this is in namespace nm::math
//
// 2. In math.cpp, add a templated inline static version of the function which takes
//    only void* pointers and uses static_cast to convert them to the
//    proper dtype. This should also be in namespace nm::math
//
//    This function may also need to switch m and n if these arguments are given.
//
//    For an example, see cblas_gemm. This function should do nothing other than cast
//    appropriately. If cblas_dbacon, cblas_sbacon, cblas_cbacon, and cblas_zbacon
//    all take void* only, and no other pointers that vary between functions, you can skip
//    this particular step -- as we can call them directly using a custom function pointer
//    array (same function signature!).
//
//    This version of the function will be the one exposed through NMatrix::BLAS. We
//    want it to be as close to the actual BLAS version of the function as possible,
//    and with as few checks as possible.
//
//    You will probably need a forward declaration in the extern "C" block.
//
//    Note: In that case, the function you wrote in Step 1 should also take exactly the
//    same arguments as cblas_xbacon. Otherwise Bad Things will happen.
//
// 3. In cblas_templates_core.h, add a default template like in step 1 (which will just
//    call nm::math::bacon()) and also
//    inline specialized versions of bacon for the different BLAS types.
//    This will allow both nmatrix-atlas and nmatrix-lapacke to use the optimized version
//    of bacon from whatever external library is available, as well as the internal version
//    if an external version is not available. These functions will end up in a namsespace
//    like nm::math::atlas, but don't explicitly put them in a namespace, they will get
//    put in the appropriate namespace when cblas_templates_core.h is included.
//
//    template <typename DType>
//    inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, DType* A, ...) {
//      nm::math::bacon(trans, M, N, A, ...);
//    }
//
//    template <>
//    inline bool bacon(const CBLAS_TRANSPOSE trans, const int M, const int N, float* A, ...) {
//      cblas_sbacon(trans, M, N, A, ...);
//      return true;
//    }
//
//    Note that you should do everything in your power here to parse any return values
//    cblas_sbacon may give you. We're not trying very hard in this example, but you might
//    look at getrf to see how it might be done.
//
// 4. Write the C function nm_cblas_bacon, which is what Ruby will call. Use the example
//    of nm_cblas_gemm below. Also you must add a similar function in math_atlas.cpp
//    and math_lapacke.cpp
//
// 5. Expose the function in nm_math_init_blas(), in math.cpp:
//
//    rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_bacon", (METHOD)nm_cblas_bacon, 5);
//
//    Do something similar in math_atlas.cpp and math_lapacke.cpp to add the function
//    to the plugin gems.
//
//    Here, we're telling Ruby that nm_cblas_bacon takes five arguments as a Ruby function.
//
// 6. In blas.rb, write a bacon function which accesses cblas_bacon, but does all the
//    sanity checks we left out in step 2.
//
// 7. Write tests for NMatrix::BLAS::bacon, confirming that it works for the ATLAS dtypes.
//
// 8. After you get it working properly with CBLAS, download dbacon.f from NETLIB, and use
//    f2c to convert it to C. Clean it up so it's readable. Remove the extra indices -- f2c
//    inserts a lot of unnecessary stuff.
//
//    Copy and paste the output into the default templated function you wrote in Step 1.
//    Fix it so it works as a template instead of just for doubles.
//
//    Because of step 3, this will automatically also work for the nmatrix-atlas
//    and nmatrix-lapacke implementations.
//
// 9. Write tests to confirm that it works for all data types.
//
// 10. See about adding a Ruby-like interface, such as matrix_matrix_multiply for cblas_gemm,
//    or matrix_vector_multiply for cblas_gemv. This step is not mandatory.
//
// 11. Pull request!

/*
 * Project Includes
 */


#include <ruby.h>
#include <algorithm>
#include <limits>
#include <cmath>

#include "math/cblas_enums.h"

#include "data/data.h"
#include "math/magnitude.h"
#include "math/imax.h"
#include "math/scal.h"
#include "math/laswp.h"
#include "math/trsm.h"
#include "math/gemm.h"
#include "math/gemv.h"
#include "math/asum.h"
#include "math/nrm2.h"
#include "math/getrf.h"
#include "math/getrs.h"
#include "math/rot.h"
#include "math/rotg.h"
#include "math/math.h"
#include "math/util.h"
#include "storage/dense/dense.h"

#include "nmatrix.h"
#include "ruby_constants.h"

/*
 * Forward Declarations
 */

extern "C" {
  /* BLAS Level 1. */
  static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
  static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
  static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
  static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
  static VALUE nm_cblas_rotg(VALUE self, VALUE ab);
  static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);

  /* BLAS Level 2. */
  static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
                             VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);

  /* BLAS Level 3. */
  static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
                             VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
  static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
                             VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);

  /* LAPACK. */
  static VALUE nm_has_clapack(VALUE self);
  static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
  static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
  static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
} // end of extern "C" block

////////////////////
// Math Functions //
////////////////////

namespace nm {
  namespace math {

    /*
     * Calculate the determinant for a dense matrix (A [elements]) of size 2 or 3. Return the result.
     */
    template <typename DType>
    void det_exact_from_dense(const int M, const void* A_elements, const int lda, void* result_arg) {
      DType* result  = reinterpret_cast<DType*>(result_arg);
      const DType* A = reinterpret_cast<const DType*>(A_elements);

      typename LongDType<DType>::type x, y;

      if (M == 2) {
        *result = A[0] * A[lda+1] - A[1] * A[lda];
      } else if (M == 3) {
        x = A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]; // ei - fh
        y = A[lda] * A[2*lda+2] -   A[lda+2] * A[2*lda];   // fg - di
        x = A[0]*x - A[1]*y ; // a*(ei-fh) - b*(fg-di)

        y = A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda];    // dh - eg
        *result = A[2]*y + x; // c*(dh-eg) + _
      } else if (M < 2) {
        rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
      } else {
        rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
      }
    }

    //we can't do det_exact on byte, because it will want to return a byte (unsigned), but determinants can be negative, even if all elements of the matrix are positive
    template <>
    void det_exact_from_dense<uint8_t>(const int M, const void* A_elements, const int lda, void* result_arg) {
      rb_raise(nm_eDataTypeError, "cannot call det_exact on unsigned type");
    }
    /*
     * Calculate the determinant for a yale matrix (storage) of size 2 or 3. Return the result.
     */
    template <typename DType>
    void det_exact_from_yale(const int M, const YALE_STORAGE* storage, const int lda, void* result_arg) {
      DType* result  = reinterpret_cast<DType*>(result_arg);
      IType* ija = reinterpret_cast<IType *>(storage->ija);
      DType* a = reinterpret_cast<DType*>(storage->a);
      IType col_pos = storage->shape[0] + 1;
      if (M == 2) {
        if (ija[2] - ija[0] == 2) {
          *result = a[0] * a[1] - a[col_pos] * a[col_pos+1];
        }
        else { *result = a[0] * a[1]; }
      } else if (M == 3) {
        DType m[3][3];
        for (int i = 0; i < 3; ++i) {
          m[i][i] = a[i];
          switch(ija[i+1] - ija[i]) {
          case 2:
            m[i][ija[col_pos]] = a[col_pos];
            m[i][ija[col_pos+1]] = a[col_pos+1];
            col_pos += 2;
            break;
          case 1:
            m[i][(i+1)%3] = m[i][(i+2)%3] = 0;
            m[i][ija[col_pos]] = a[col_pos];
            ++col_pos;
            break;
          case 0:
            m[i][(i+1)%3] = m[i][(i+2)%3] = 0;
            break;
          default:
            rb_raise(rb_eArgError, "some value in IJA is incorrect!");
          }
        }
        *result =
          m[0][0] * m[1][1] * m[2][2] + m[0][1] * m[1][2] * m[2][0] + m[0][2] * m[1][0] * m[2][1]
        - m[0][0] * m[1][2] * m[2][1] - m[0][1] * m[1][0] * m[2][2] - m[0][2] * m[1][1] * m[2][0];

      } else if (M < 2) {
        rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
      } else {
        rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
      }
    }

    /*
     * Solve a system of linear equations using forward-substution followed by
     * back substution from the LU factorization of the matrix of co-efficients.
     * Replaces x_elements with the result. Works only with non-integer, non-object
     * data types.
     *
     * args - r           -> The number of rows of the matrix.
     *        lu_elements -> Elements of the LU decomposition of the co-efficients
     *                       matrix, as a contiguos array.
     *        b_elements  -> Elements of the the right hand sides, as a contiguous array.
     *        x_elements  -> The array that will contain the results of the computation.
     *        pivot       -> Positions of permuted rows.
     */
    template <typename DType>
    void solve(const int r, const void* lu_elements, const void* b_elements, void* x_elements, const int* pivot) {
      int ii = 0, ip;
      DType sum;

      const DType* matrix = reinterpret_cast<const DType*>(lu_elements);
      const DType* b      = reinterpret_cast<const DType*>(b_elements);
      DType* x            = reinterpret_cast<DType*>(x_elements);

      for (int i = 0; i < r; ++i) { x[i] = b[i]; }
      for (int i = 0; i < r; ++i) { // forward substitution loop
        ip = pivot[i];
        sum = x[ip];
        x[ip] = x[i];

        if (ii != 0) {
          for (int j = ii - 1;j < i; ++j) { sum = sum - matrix[i * r + j] * x[j]; }
        }
        else if (sum != 0.0) {
          ii = i + 1;
        }
        x[i] = sum;
      }

      for (int i = r - 1; i >= 0; --i) { // back substitution loop
        sum = x[i];
        for (int j = i + 1; j < r; j++) { sum = sum - matrix[i * r + j] * x[j]; }
        x[i] = sum/matrix[i * r + i];
      }
    }

    /*
     * Calculates in-place inverse of A_elements. Uses Gauss-Jordan elimination technique.
     * In-place inversion of the matrix saves on memory and time.
     *
     * args - M - Shape of the matrix
     *        a_elements - A duplicate of the original expressed as a contiguos array
     */
    template <typename DType>
    void inverse(const int M, void* a_elements) {
      DType* matrix   = reinterpret_cast<DType*>(a_elements);
      int row_index[M]; // arrays for keeping track of column scrambling
      int col_index[M];

      for (int k = 0;k < M; ++k) {
        typename MagnitudeDType<DType>::type akk;
        akk = magnitude( matrix[k * (M + 1)] ); // diagonal element

        int interchange = k;

        for (int row = k + 1; row < M; ++row) {
          typename MagnitudeDType<DType>::type big;
          big = magnitude( matrix[M*row + k] ); // element below the temp pivot

          if ( big > akk ) {
            interchange = row;
            akk = big;
          }
        }

        if (interchange != k) { // check if rows need flipping
          DType temp;

          for (int col = 0; col < M; ++col) {
            NM_SWAP(matrix[interchange*M + col], matrix[k*M + col], temp);
          }
        }

        row_index[k] = interchange;
        col_index[k] = k;

        if (matrix[k * (M + 1)] == (DType)(0)) {
          rb_raise(rb_eZeroDivError, "Expected Non-Singular Matrix.");
        }

        DType pivot = matrix[k * (M + 1)];
        matrix[k * (M + 1)] = (DType)(1); // set diagonal as 1 for in-place inversion

        for (int col = 0; col < M; ++col) {
          // divide each element in the kth row with the pivot
          matrix[k*M + col] = matrix[k*M + col] / pivot;
        }

        for (int kk = 0; kk < M; ++kk) { // iterate and reduce all rows
          if (kk == k) continue;

          DType dum = matrix[k + M*kk];
          matrix[k + M*kk] = (DType)(0); // prepare for inplace inversion
          for (int col = 0; col < M; ++col) {
            matrix[M*kk + col] = matrix[M*kk + col] - matrix[M*k + col] * dum;
          }
        }
      }

      // Unscramble columns
      DType temp;

      for (int k = M - 1; k >= 0; --k) {
        if (row_index[k] != col_index[k]) {

          for (int row = 0; row < M; ++row) {
            NM_SWAP(matrix[row * M + row_index[k]], matrix[row * M + col_index[k]],
              temp);
          }
        }
      }
    }

    /*
     * Reduce a square matrix to hessenberg form with householder transforms
     *
     * == Arguments
     *
     * nrows - The number of rows present in matrix a.
     * a_elements - Elements of the matrix to be reduced in 1D array form.
     *
     * == References
     *
     * http://www.mymathlib.com/c_source/matrices/eigen/hessenberg_orthog.c
     * This code has been included by permission of the author.
     */
    template <typename DType>
    void hessenberg(const int nrows, void* a_elements) {
      DType* a = reinterpret_cast<DType*>(a_elements);
      DType* u = new DType[nrows]; // auxillary storage for the chosen vector
      DType sum_of_squares, *p_row, *psubdiag, *p_a, scale, innerproduct;
      int i, k, col;

      // For each column use a Householder transformation to zero all entries
      // below the subdiagonal.
      for (psubdiag = a + nrows, col = 0; col < nrows - 2; psubdiag += nrows + 1,
        col++) {
        // Calculate the signed square root of the sum of squares of the
        // elements below the diagonal.

        for (p_a = psubdiag, sum_of_squares = 0.0, i = col + 1; i < nrows;
          p_a += nrows, i++) {
          sum_of_squares += *p_a * *p_a;
        }
        if (sum_of_squares == 0.0) { continue; }
        sum_of_squares = std::sqrt(sum_of_squares);

        if ( *psubdiag >= 0.0 ) { sum_of_squares = -sum_of_squares; }

        // Calculate the Householder transformation Q = I - 2uu'/u'u.
        u[col + 1] = *psubdiag - sum_of_squares;
        *psubdiag = sum_of_squares;

        for (p_a = psubdiag + nrows, i = col + 2; i < nrows; p_a += nrows, i++) {
          u[i] = *p_a;
          *p_a = 0.0;
        }

        // Premultiply A by Q
        scale = -1.0 / (sum_of_squares * u[col+1]);
        for (p_row = psubdiag - col, i = col + 1; i < nrows; i++) {
          p_a = a + nrows * (col + 1) + i;
          for (innerproduct = 0.0, k = col + 1; k < nrows; p_a += nrows, k++) {
            innerproduct += u[k] * *p_a;
          }
          innerproduct *= scale;
          for (p_a = p_row + i, k = col + 1; k < nrows; p_a += nrows, k++) {
            *p_a -= u[k] * innerproduct;
          }
        }

        // Postmultiply QA by Q
        for (p_row = a, i = 0; i < nrows; p_row += nrows, i++) {
          for (innerproduct = 0.0, k = col + 1; k < nrows; k++) {
            innerproduct += u[k] * *(p_row + k);
          }
          innerproduct *= scale;

          for (k = col + 1; k < nrows; k++) {
            *(p_row + k) -= u[k] * innerproduct;
          }
        }
      }

      delete[] u;
    }

    void raise_not_invertible_error() {
        rb_raise(nm_eNotInvertibleError,
            "matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
    }

    /*
     * Calculate the exact inverse for a dense matrix (A [elements]) of size 2 or 3. Places the result in B_elements.
     */
    template <typename DType>
    void inverse_exact_from_dense(const int M, const void* A_elements,
        const int lda, void* B_elements, const int ldb) {

      const DType* A = reinterpret_cast<const DType*>(A_elements);
      DType* B       = reinterpret_cast<DType*>(B_elements);

      if (M == 2) {
        DType det = A[0] * A[lda+1] - A[1] * A[lda];
        if (det == 0) { raise_not_invertible_error(); }
        B[0] = A[lda+1] / det;
        B[1] = -A[1] / det;
        B[ldb] = -A[lda] / det;
        B[ldb+1] = A[0] / det;

      } else if (M == 3) {
        // Calculate the exact determinant.
        DType det;
        det_exact_from_dense<DType>(M, A_elements, lda, reinterpret_cast<void*>(&det));
        if (det == 0) { raise_not_invertible_error(); }

        B[0]      = (  A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
        B[1]      = (- A[1]     * A[2*lda+2] + A[2]     * A[2*lda+1]) / det; // D = -bi + ch
        B[2]      = (  A[1]     * A[lda+2]   - A[2]     * A[lda+1])   / det; // G = bf - ce
        B[ldb]    = (- A[lda]   * A[2*lda+2] + A[lda+2] * A[2*lda])   / det; // B = -di + fg
        B[ldb+1]  = (  A[0]     * A[2*lda+2] - A[2]     * A[2*lda])   / det; // E = ai - cg
        B[ldb+2]  = (- A[0]     * A[lda+2]   + A[2]     * A[lda])     / det; // H = -af + cd
        B[2*ldb]  = (  A[lda]   * A[2*lda+1] - A[lda+1] * A[2*lda])   / det; // C = dh - eg
        B[2*ldb+1]= ( -A[0]     * A[2*lda+1] + A[1]     * A[2*lda])   / det; // F = -ah + bg
        B[2*ldb+2]= (  A[0]     * A[lda+1]   - A[1]     * A[lda])     / det; // I = ae - bd
      } else if (M == 1) {
        B[0] = 1 / A[0];
      } else {
        rb_raise(rb_eNotImpError, "exact inverse calculation needed for matrices larger than 3x3");
      }
    }

    template <typename DType>
    void inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
        const int lda, YALE_STORAGE* inverse, const int ldb) {

      // inverse is a clone of storage
      const DType* a = reinterpret_cast<const DType*>(storage->a);
      const IType* ija = reinterpret_cast<const IType *>(storage->ija);
      DType* b       = reinterpret_cast<DType*>(inverse->a);
      IType* ijb = reinterpret_cast<IType *>(inverse->ija);
      IType col_pos = storage->shape[0] + 1;
      // Calculate the exact determinant.
      DType det;

      if (M == 2) {
        IType ndnz = ija[2] - ija[0];
        if (ndnz == 2) {
          det = a[0] * a[1] - a[col_pos] * a[col_pos+1];
        }
        else { det = a[0] * a[1]; }
        if (det == 0) { raise_not_invertible_error(); }
        b[0] = a[1] / det;
        b[1] = a[0] / det;
        if (ndnz == 2) {
          b[col_pos] = -a[col_pos] / det;
          b[col_pos+1] = -a[col_pos+1] / det;
        }
        else if (ndnz == 1) {
          b[col_pos] = -a[col_pos] / det;
        }

      } else if (M == 3) {
        DType *A = new DType[lda*3];
        for (int i = 0; i < lda; ++i) {
          A[i*3+i] = a[i];
          switch (ija[i+1] - ija[i]) {
          case 2:
            A[i*3 + ija[col_pos]] = a[col_pos];
            A[i*3 + ija[col_pos+1]] = a[col_pos+1];
            col_pos += 2;
            break;
          case 1:
            A[i*3 + (i+1)%3] = A[i*3 + (i+2)%3] = 0;
            A[i*3 + ija[col_pos]] = a[col_pos];
            col_pos += 1;
            break;
          case 0:
            A[i*3 + (i+1)%3] = A[i*3 + (i+2)%3] = 0;
            break;
          default:
            rb_raise(rb_eArgError, "some value in IJA is incorrect!");
          }
        }
        det =
          A[0] * A[lda+1] * A[2*lda+2] + A[1] * A[lda+2] * A[2*lda] + A[2] * A[lda] * A[2*lda+1]
        - A[0] * A[lda+2] * A[2*lda+1] - A[1] * A[lda] * A[2*lda+2] - A[2] * A[lda+1] * A[2*lda];
        if (det == 0) { raise_not_invertible_error(); }

        DType *B = new DType[3*ldb];
        B[0]      = (  A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
        B[1]      = (- A[1]     * A[2*lda+2] + A[2]     * A[2*lda+1]) / det; // D = -bi + ch
        B[2]      = (  A[1]     * A[lda+2]   - A[2]     * A[lda+1])   / det; // G = bf - ce
        B[ldb]    = (- A[lda]   * A[2*lda+2] + A[lda+2] * A[2*lda])   / det; // B = -di + fg
        B[ldb+1]  = (  A[0]     * A[2*lda+2] - A[2]     * A[2*lda])   / det; // E = ai - cg
        B[ldb+2]  = (- A[0]     * A[lda+2]   + A[2]     * A[lda])     / det; // H = -af + cd
        B[2*ldb]  = (  A[lda]   * A[2*lda+1] - A[lda+1] * A[2*lda])   / det; // C = dh - eg
        B[2*ldb+1]= ( -A[0]     * A[2*lda+1] + A[1]     * A[2*lda])   / det; // F = -ah + bg
        B[2*ldb+2]= (  A[0]     * A[lda+1]   - A[1]     * A[lda])     / det; // I = ae - bd

        // Calculate the size of ijb and b, then reallocate them.
        IType ndnz = 0;
        for (int i = 0; i < 3; ++i) {
          for (int j = 0; j < 3; ++j) {
            if (j != i && B[i*ldb + j] != 0) { ++ndnz; }
          }
        }
        inverse->ndnz = ndnz;
        col_pos = 4; // shape[0] + 1
        inverse->capacity = 4 + ndnz;
        NM_REALLOC_N(inverse->a, DType, 4 + ndnz);
        NM_REALLOC_N(inverse->ija, IType, 4 + ndnz);
        b = reinterpret_cast<DType*>(inverse->a);
        ijb = reinterpret_cast<IType *>(inverse->ija);

        for (int i = 0; i < 3; ++i) {
          ijb[i] = col_pos;
          for (int j = 0; j < 3; ++j) {
            if (j == i) {
              b[i] = B[i*ldb + j];
            }
            else if (B[i*ldb + j] != 0) {
              b[col_pos] = B[i*ldb + j];
              ijb[col_pos] = j;
              ++col_pos;
            }
          }
        }
        b[3] = 0;
        ijb[3] = col_pos;
        delete [] B;
        delete [] A;
      } else if (M == 1) {
        b[0] = 1 / a[0];
      } else {
        rb_raise(rb_eNotImpError, "exact inverse calculation needed for matrices larger than 3x3");
      }
    }

    /*
     * Function signature conversion for calling CBLAS' gemm functions as directly as possible.
     *
     * For documentation: http://www.netlib.org/blas/dgemm.f
     */
    template <typename DType>
    inline static void cblas_gemm(const enum CBLAS_ORDER order,
                                  const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b,
                                  int m, int n, int k,
                                  void* alpha,
                                  void* a, int lda,
                                  void* b, int ldb,
                                  void* beta,
                                  void* c, int ldc)
    {
      gemm<DType>(order, trans_a, trans_b, m, n, k, reinterpret_cast<DType*>(alpha),
                  reinterpret_cast<DType*>(a), lda,
                  reinterpret_cast<DType*>(b), ldb, reinterpret_cast<DType*>(beta),
                  reinterpret_cast<DType*>(c), ldc);
    }


    /*
     * Function signature conversion for calling CBLAS's gemv functions as directly as possible.
     *
     * For documentation: http://www.netlib.org/lapack/double/dgetrf.f
     */
    template <typename DType>
    inline static bool cblas_gemv(const enum CBLAS_TRANSPOSE trans,
                                  const int m, const int n,
                                  const void* alpha,
                                  const void* a, const int lda,
                                  const void* x, const int incx,
                                  const void* beta,
                                  void* y, const int incy)
    {
      return gemv<DType>(trans,
                         m, n, reinterpret_cast<const DType*>(alpha),
                         reinterpret_cast<const DType*>(a), lda,
                         reinterpret_cast<const DType*>(x), incx, reinterpret_cast<const DType*>(beta),
                         reinterpret_cast<DType*>(y), incy);
    }


    /*
     * Function signature conversion for calling CBLAS' trsm functions as directly as possible.
     *
     * For documentation: http://www.netlib.org/blas/dtrsm.f
     */
    template <typename DType>
    inline static void cblas_trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
                                   const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_DIAG diag,
                                   const int m, const int n, const void* alpha, const void* a,
                                   const int lda, void* b, const int ldb)
    {
      trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha),
                  reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
    }

  }
} // end of namespace nm::math


extern "C" {

///////////////////
// Ruby Bindings //
///////////////////

void nm_math_init_blas() {
  VALUE cNMatrix_Internal = rb_define_module_under(cNMatrix, "Internal");

  rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_has_clapack, 0);

  VALUE cNMatrix_Internal_LAPACK = rb_define_module_under(cNMatrix_Internal, "LAPACK");

  /* ATLAS-CLAPACK Functions that are implemented internally */
  rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
  rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
  rb_define_singleton_method(cNMatrix_Internal_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);

  VALUE cNMatrix_Internal_BLAS = rb_define_module_under(cNMatrix_Internal, "BLAS");

  rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_scal", (METHOD)nm_cblas_scal, 4);
  rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
  rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
  rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rot",  (METHOD)nm_cblas_rot,  7);
  rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
  rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_imax", (METHOD)nm_cblas_imax, 3);

  rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
  rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
  rb_define_singleton_method(cNMatrix_Internal_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
}

/*
 * call-seq:
 *     NMatrix::BLAS.cblas_scal(n, alpha, vector, inc) -> NMatrix
 *
 * BLAS level 1 function +scal+. Works with all dtypes.
 *
 * Scale +vector+ in-place by +alpha+ and also return it. The operation is as
 * follows:
 *  x <- alpha * x
 *
 * - +n+ -> Number of elements of +vector+.
 * - +alpha+ -> Scalar value used in the operation.
 * - +vector+ -> NMatrix of shape [n,1] or [1,n]. Modified in-place.
 * - +inc+ -> Increment used in the scaling function. Should generally be 1.
 */
static VALUE nm_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE incx) {
  nm::dtype_t dtype = NM_DTYPE(vector);

  void* scalar = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  rubyval_to_cval(alpha, dtype, scalar);

  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_scal, void, const int n,
      const void* scalar, void* x, const int incx);

  ttable[dtype](FIX2INT(n), scalar, NM_STORAGE_DENSE(vector)->elements,
      FIX2INT(incx));

  return vector;
}

/*
 * Call any of the cblas_xrotg functions as directly as possible.
 *
 * xROTG computes the elements of a Givens plane rotation matrix such that:
 *
 *  |  c s |   | a |   | r |
 *  | -s c | * | b | = | 0 |
 *
 * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
 *
 * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
 *
 * This function differs from most of the other raw BLAS accessors. Instead of
 * providing a, b, c, s as arguments, you should only provide a and b (the
 * inputs), and you should provide them as the first two elements of any dense
 * NMatrix type.
 *
 * The outputs [c,s] will be returned in a Ruby Array at the end; the input
 * NMatrix will also be modified in-place.
 *
 * This function, like the other cblas_ functions, does minimal type-checking.
 */
static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
  static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
      NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
      nm::math::cblas_rotg<float>,
      nm::math::cblas_rotg<double>,
      nm::math::cblas_rotg<nm::Complex64>,
      nm::math::cblas_rotg<nm::Complex128>,
      NULL //nm::math::cblas_rotg<nm::RubyObject>
  };

  nm::dtype_t dtype = NM_DTYPE(ab);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
    return Qnil;

  } else {
    NM_CONSERVATIVE(nm_register_value(&self));
    NM_CONSERVATIVE(nm_register_value(&ab));
    void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
         *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);

    // extract A and B from the NVector (first two elements)
    void* pA = NM_STORAGE_DENSE(ab)->elements;
    void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
    // c and s are output

    ttable[dtype](pA, pB, pC, pS);

    VALUE result = rb_ary_new2(2);

    if (dtype == nm::RUBYOBJ) {
      rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
      rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
    } else {
      rb_ary_store(result, 0, nm::rubyobj_from_cval(pC, dtype).rval);
      rb_ary_store(result, 1, nm::rubyobj_from_cval(pS, dtype).rval);
    }
    NM_CONSERVATIVE(nm_unregister_value(&ab));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    return result;
  }
}


/*
 * Call any of the cblas_xrot functions as directly as possible.
 *
 * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
 *
 * It's tough to find documentation on xROT. Here are what we think the arguments are for:
 *  * n     :: number of elements to consider in x and y
 *  * x     :: a vector (expects an NVector)
 *  * incx  :: stride of x
 *  * y     :: a vector (expects an NVector)
 *  * incy  :: stride of y
 *  * c     :: cosine of the angle of rotation
 *  * s     :: sine of the angle of rotation
 *
 * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
 * will be float for Complex64 or double for Complex128.
 *
 * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
  static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
      NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
      nm::math::cblas_rot<float,float>,
      nm::math::cblas_rot<double,double>,
      nm::math::cblas_rot<nm::Complex64,float>,
      nm::math::cblas_rot<nm::Complex128,double>,
      nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
  };

  nm::dtype_t dtype = NM_DTYPE(x);


  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
    return Qfalse;
  } else {
    void *pC, *pS;

    // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
    if (dtype == nm::COMPLEX64) {
      pC = NM_ALLOCA_N(float,1);
      pS = NM_ALLOCA_N(float,1);
      rubyval_to_cval(c, nm::FLOAT32, pC);
      rubyval_to_cval(s, nm::FLOAT32, pS);
    } else if (dtype == nm::COMPLEX128) {
      pC = NM_ALLOCA_N(double,1);
      pS = NM_ALLOCA_N(double,1);
      rubyval_to_cval(c, nm::FLOAT64, pC);
      rubyval_to_cval(s, nm::FLOAT64, pS);
    } else {
      pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
      pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
      rubyval_to_cval(c, dtype, pC);
      rubyval_to_cval(s, dtype, pS);
    }


    ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);

    return Qtrue;
  }
}


/*
 * Call any of the cblas_xnrm2 functions as directly as possible.
 *
 * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
 *
 * Arguments:
 *  * n     :: length of x, must be at least 0
 *  * x     :: pointer to first entry of input vector
 *  * incx  :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
 *
 * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {

  static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
      NULL, NULL, NULL, NULL, NULL, // no help for integers
      nm::math::cblas_nrm2<float32_t>,
      nm::math::cblas_nrm2<float64_t>,
      nm::math::cblas_nrm2<nm::Complex64>,
      nm::math::cblas_nrm2<nm::Complex128>,
      nm::math::cblas_nrm2<nm::RubyObject>
  };

  nm::dtype_t dtype  = NM_DTYPE(x);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
    return Qnil;

  } else {
    // Determine the return dtype and allocate it
    nm::dtype_t rdtype = dtype;
    if      (dtype == nm::COMPLEX64)  rdtype = nm::FLOAT32;
    else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;

    void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);

    ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);

    return nm::rubyobj_from_cval(Result, rdtype).rval;
  }
}


/*
 * Call any of the cblas_xasum functions as directly as possible.
 *
 * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
 * of a vector x.
 *
 * Arguments:
 *  * n     :: length of x, must be at least 0
 *  * x     :: pointer to first entry of input vector
 *  * incx  :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
 *
 * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {

  static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
      nm::math::cblas_asum<uint8_t>,
      nm::math::cblas_asum<int8_t>,
      nm::math::cblas_asum<int16_t>,
      nm::math::cblas_asum<int32_t>,
      nm::math::cblas_asum<int64_t>,
      nm::math::cblas_asum<float32_t>,
      nm::math::cblas_asum<float64_t>,
      nm::math::cblas_asum<nm::Complex64>,
      nm::math::cblas_asum<nm::Complex128>,
      nm::math::cblas_asum<nm::RubyObject>
  };

  nm::dtype_t dtype  = NM_DTYPE(x);

  // Determine the return dtype and allocate it
  nm::dtype_t rdtype = dtype;
  if      (dtype == nm::COMPLEX64)  rdtype = nm::FLOAT32;
  else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;

  void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);

  ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);

  return nm::rubyobj_from_cval(Result, rdtype).rval;
}

/*
 * call-seq:
 *    NMatrix::BLAS.cblas_imax(n, vector, inc) -> Fixnum
 *
 * BLAS level 1 routine.
 *
 * Return the index of the largest element of +vector+.
 *
 * - +n+ -> Vector's size. Generally, you can use NMatrix#rows or NMatrix#cols.
 * - +vector+ -> A NMatrix of shape [n,1] or [1,n] with any dtype.
 * - +inc+ -> It's the increment used when searching. Use 1 except if you know
 *   what you're doing.
 */
static VALUE nm_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_imax, int, const int n, const void* x, const int incx);

  nm::dtype_t dtype = NM_DTYPE(x);

  int index = ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx));

  // Convert to Ruby's Int value.
  return INT2FIX(index);
}


/* Call any of the cblas_xgemm functions as directly as possible.
 *
 * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
 *
 *    C = alpha*op(A)*op(B) + beta*C
 *
 * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
 *
 * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
 * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
 * expose the ultra-optimized ATLAS versions.
 *
 * == Arguments
 * See: http://www.netlib.org/blas/dgemm.f
 *
 * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_cblas_gemm(VALUE self,
                           VALUE order,
                           VALUE trans_a, VALUE trans_b,
                           VALUE m, VALUE n, VALUE k,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE b, VALUE ldb,
                           VALUE beta,
                           VALUE c, VALUE ldc)
{
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);

  nm::dtype_t dtype = NM_DTYPE(a);

  void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
       *pBeta  = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  rubyval_to_cval(alpha, dtype, pAlpha);
  rubyval_to_cval(beta, dtype, pBeta);

  ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));

  return c;
}


/* Call any of the cblas_xgemv functions as directly as possible.
 *
 * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
 *
 *    y = alpha*op(A)*x + beta*y
 *
 * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
 *
 * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
 * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
 * expose the ultra-optimized ATLAS versions.
 *
 * == Arguments
 * See: http://www.netlib.org/blas/dgemm.f
 *
 * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_cblas_gemv(VALUE self,
                           VALUE trans_a,
                           VALUE m, VALUE n,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE x, VALUE incx,
                           VALUE beta,
                           VALUE y, VALUE incy)
{
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)

  nm::dtype_t dtype = NM_DTYPE(a);

  void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
       *pBeta  = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  rubyval_to_cval(alpha, dtype, pAlpha);
  rubyval_to_cval(beta, dtype, pBeta);

  return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
}


static VALUE nm_cblas_trsm(VALUE self,
                           VALUE order,
                           VALUE side, VALUE uplo,
                           VALUE trans_a, VALUE diag,
                           VALUE m, VALUE n,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE b, VALUE ldb)
{
  static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
                                        const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
                                        const int m, const int n, const void* alpha, const void* a,
                                        const int lda, void* b, const int ldb) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::cblas_trsm<float>,
      nm::math::cblas_trsm<double>,
      nm::math::cblas_trsm<nm::Complex64>,
      nm::math::cblas_trsm<nm::Complex128>,
      nm::math::cblas_trsm<nm::RubyObject>
  };

  nm::dtype_t dtype = NM_DTYPE(a);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
    rubyval_to_cval(alpha, dtype, pAlpha);

    ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
  }

  return Qtrue;
}

/* Call any of the clapack_xgetrf functions as directly as possible.
 *
 * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
 * matrix A using partial pivoting with row interchanges.
 *
 * The factorization has the form:
 *    A = P * L * U
 * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
 * and U is upper triangular (upper trapezoidal if m < n).
 *
 * This is the right-looking level 3 BLAS version of the algorithm.
 *
 * == Arguments
 * See: http://www.netlib.org/lapack/double/dgetrf.f
 * (You don't need argument 5; this is the value returned by this function.)
 *
 * You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 *
 * Returns an array giving the pivot indices (normally these are argument #5).
 */
static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::clapack_getrf<float>,
      nm::math::clapack_getrf<double>,
      nm::math::clapack_getrf<nm::Complex64>,
      nm::math::clapack_getrf<nm::Complex128>,
      nm::math::clapack_getrf<nm::RubyObject>
  };

  int M = FIX2INT(m),
      N = FIX2INT(n);

  // Allocate the pivot index array, which is of size MIN(M, N).
  size_t ipiv_size = std::min(M,N);
  int* ipiv = NM_ALLOCA_N(int, ipiv_size);

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    // Call either our version of getrf or the LAPACK version.
    ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
  }

  // Result will be stored in a. We return ipiv as an array.
  VALUE ipiv_array = rb_ary_new2(ipiv_size);
  for (size_t i = 0; i < ipiv_size; ++i) {
    rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
  }

  return ipiv_array;
}


/*
 * Call any of the clapack_xgetrs functions as directly as possible.
 */
static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
                                       const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
                                       const int ldb) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::clapack_getrs<float>,
      nm::math::clapack_getrs<double>,
      nm::math::clapack_getrs<nm::Complex64>,
      nm::math::clapack_getrs<nm::Complex128>,
      nm::math::clapack_getrs<nm::RubyObject>
  };

  // Allocate the C version of the pivot index array
  int* ipiv_;
  if (!RB_TYPE_P(ipiv, T_ARRAY)) {
    rb_raise(rb_eArgError, "ipiv must be of type Array");
  } else {
    ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
    for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
      ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
    }
  }

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {

    // Call either our version of getrs or the LAPACK version.
    ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
                        ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
  }

  // b is both returned and modified directly in the argument list.
  return b;
}

/*
 * Simple way to check from within Ruby code if clapack functions are available, without
 * having to wait around for an exception to be thrown.
 */
static VALUE nm_has_clapack(VALUE self) {
  return Qfalse;
}

/*
 * Call any of the clapack_xlaswp functions as directly as possible.
 *
 * Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
 * number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
 */
static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
  static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
      nm::math::clapack_laswp<uint8_t>,
      nm::math::clapack_laswp<int8_t>,
      nm::math::clapack_laswp<int16_t>,
      nm::math::clapack_laswp<int32_t>,
      nm::math::clapack_laswp<int64_t>,
      nm::math::clapack_laswp<float>,
      nm::math::clapack_laswp<double>,
      nm::math::clapack_laswp<nm::Complex64>,
      nm::math::clapack_laswp<nm::Complex128>,
      nm::math::clapack_laswp<nm::RubyObject>
  };

  // Allocate the C version of the pivot index array
  int* ipiv_;
  if (!RB_TYPE_P(ipiv, T_ARRAY)) {
    rb_raise(rb_eArgError, "ipiv must be of type Array");
  } else {
    ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
    for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
      ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
    }
  }

  // Call either our version of laswp or the LAPACK version.
  ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));

  // a is both returned and modified directly in the argument list.
  return a;
}


/*
 * C accessor for calculating an exact determinant. Dense matrix version.
 */
void nm_math_det_exact_from_dense(const int M, const void* elements, const int lda,
        nm::dtype_t dtype, void* result) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact_from_dense, void, const int M,
          const void* A_elements, const int lda, void* result_arg);

  ttable[dtype](M, elements, lda, result);
}

/*
 * C accessor for calculating an exact determinant. Yale matrix version.
 */
void nm_math_det_exact_from_yale(const int M, const YALE_STORAGE* storage, const int lda,
        nm::dtype_t dtype, void* result) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact_from_yale, void, const int M,
          const YALE_STORAGE* storage, const int lda, void* result_arg);

  ttable[dtype](M, storage, lda, result);
}

/*
 * C accessor for solving a system of linear equations.
 */
void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv) {
  int* pivot = new int[RARRAY_LEN(ipiv)];

  for (int i = 0; i < RARRAY_LEN(ipiv); ++i) {
    pivot[i] = FIX2INT(rb_ary_entry(ipiv, i));
  }

  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::solve, void, const int, const void*, const void*, void*, const int*);

  ttable[NM_DTYPE(x)](NM_SHAPE0(b), NM_STORAGE_DENSE(lu)->elements,
    NM_STORAGE_DENSE(b)->elements, NM_STORAGE_DENSE(x)->elements, pivot);
}

/*
 * C accessor for reducing a matrix to hessenberg form.
 */
void nm_math_hessenberg(VALUE a) {
  static void (*ttable[nm::NUM_DTYPES])(const int, void*) = {
      NULL, NULL, NULL, NULL, NULL, // does not support ints
      nm::math::hessenberg<float>,
      nm::math::hessenberg<double>,
      NULL, NULL, // does not support Complex
      NULL // no support for Ruby Object
  };

  ttable[NM_DTYPE(a)](NM_SHAPE0(a), NM_STORAGE_DENSE(a)->elements);
}
/*
 * C accessor for calculating an in-place inverse.
 */
void nm_math_inverse(const int M, void* a_elements, nm::dtype_t dtype) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse, void, const int, void*);

  ttable[dtype](M, a_elements);
}

/*
 * C accessor for calculating an exact inverse. Dense matrix version.
 */
void nm_math_inverse_exact_from_dense(const int M, const void* A_elements,
    const int lda, void* B_elements, const int ldb, nm::dtype_t dtype) {

  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact_from_dense, void,
      const int, const void*, const int, void*, const int);

  ttable[dtype](M, A_elements, lda, B_elements, ldb);
}

/*
 * C accessor for calculating an exact inverse. Yale matrix version.
 */
void nm_math_inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
    const int lda, YALE_STORAGE* inverse, const int ldb, nm::dtype_t dtype) {

  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact_from_yale, void,
      const int, const YALE_STORAGE*, const int, YALE_STORAGE*, const int);

  ttable[dtype](M, storage, lda, inverse, ldb);
}

/*
 * Transpose an array of elements that represent a row-major dense matrix. Does not allocate anything, only does an memcpy.
 */
void nm_math_transpose_generic(const size_t M, const size_t N, const void* A, const int lda, void* B, const int ldb, size_t element_size) {
  for (size_t i = 0; i < N; ++i) {
    for (size_t j = 0; j < M; ++j) {

      memcpy(reinterpret_cast<char*>(B) + (i*ldb+j)*element_size,
             reinterpret_cast<const char*>(A) + (j*lda+i)*element_size,
             element_size);

    }
  }
}


} // end of extern "C" block


================================================
FILE: ext/nmatrix/nm_memory.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == nm_memory.h
//
// Macros for memory allocation and freeing

/**
 * We define these macros, which just call the ruby ones, as this makes
 * debugging memory issues (particularly those involving interaction with
 * the ruby GC) easier, as it's posssible to add debugging code temporarily.
 */
#ifndef __NM_MEMORY_H__
#define __NM_MEMORY_H__

#include <ruby.h>

#define NM_ALLOC(type) (ALLOC(type))

#define NM_ALLOC_N(type, n) (ALLOC_N(type, n))

#define NM_REALLOC_N(var, type, n) (REALLOC_N(var, type, n))

#define NM_ALLOCA_N(type, n) (ALLOCA_N(type, n))

#define NM_FREE(var) (xfree(var))

#define NM_ALLOC_NONRUBY(type) ((type*) malloc(sizeof(type)))

//Defines whether to do conservative gc registrations, i.e. those
//registrations that we're not that sure are necessary.
//#define NM_GC_CONSERVATIVE

#ifdef NM_GC_CONSERVATIVE
#define NM_CONSERVATIVE(statement) (statement)
#else
#define NM_CONSERVATIVE(statement)
#endif //NM_GC_CONSERVATIVE

#endif


================================================
FILE: ext/nmatrix/nmatrix.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == nmatrix.cpp
//
// Main C++ source file for NMatrix. Contains Init_nmatrix and most Ruby
// instance and class methods for NMatrix. Also responsible for calling Init
// methods on related modules.

/*
 * Standard Includes
 */

#include <ruby.h>
#include <cfloat>
#include <algorithm> // std::min
#include <fstream>

/*
 * Project Includes
 */
#include "nmatrix_config.h"

#include "types.h"
#include "data/data.h"
#include "math/math.h"
#include "util/io.h"
#include "storage/storage.h"
#include "storage/list/list.h"
#include "storage/yale/yale.h"

#include "nmatrix.h"

#include "ruby_constants.h"

/*
 * Ruby internals
 */


/*
 * Macros
 */


/*
 * Global Variables
 */

namespace nm {

  /*
   * This function is pulled out separately so it can be called for hermitian matrix writing, which also uses it.
   */
  template <typename DType>
  size_t write_padded_dense_elements_upper(std::ofstream& f, DENSE_STORAGE* storage, symm_t symm) {
    // Write upper triangular portion. Assume 2D square matrix.
    DType* elements = reinterpret_cast<DType*>(storage->elements);
    size_t length = storage->shape[0];

    size_t bytes_written = 0;

    for (size_t i = 0; i < length; ++i) { // which row are we on?

      f.write( reinterpret_cast<const char*>( &(elements[ i*(length + 1) ]) ),
               (length - i) * sizeof(DType) );

      bytes_written += (length - i) * sizeof(DType);
    }
    return bytes_written;
  }

  /*
   * We need to specialize for Hermitian matrices. The next six functions accomplish that specialization, basically
   * by ensuring that non-complex matrices cannot read or write hermitians (which would cause big problems).
   */
  template <typename DType>
  size_t write_padded_dense_elements_herm(std::ofstream& f, DENSE_STORAGE* storage, symm_t symm) {
    rb_raise(rb_eArgError, "cannot write a non-complex matrix as hermitian");
  }

  template <>
  size_t write_padded_dense_elements_herm<Complex64>(std::ofstream& f, DENSE_STORAGE* storage, symm_t symm) {
    return write_padded_dense_elements_upper<Complex64>(f, storage, symm);
  }

  template <>
  size_t write_padded_dense_elements_herm<Complex128>(std::ofstream& f, DENSE_STORAGE* storage, symm_t symm) {
    return write_padded_dense_elements_upper<Complex128>(f, storage, symm);
  }

  template <typename DType>
  void read_padded_dense_elements_herm(DType* elements, size_t length) {
    rb_raise(rb_eArgError, "cannot read a non-complex matrix as hermitian");
  }

  template <>
  void read_padded_dense_elements_herm(Complex64* elements, size_t length) {
    for (size_t i = 0; i < length; ++i) {
      for (size_t j = i+1; j < length; ++j) {
        elements[j * length + i] = elements[i * length + j].conjugate();
      }
    }
  }

  template <>
  void read_padded_dense_elements_herm(Complex128* elements, size_t length) {
    for (size_t i = 0; i < length; ++i) {
      for (size_t j = i+1; j < length; ++j) {
        elements[j * length + i] = elements[i * length + j].conjugate();
      }
    }
  }

  /*
   * Read the elements of a dense storage matrix from a binary file, padded to 64-bits.
   *
   * storage should already be allocated. No initialization necessary.
   */
  template <typename DType>
  void read_padded_dense_elements(std::ifstream& f, DENSE_STORAGE* storage, nm::symm_t symm) {
    size_t bytes_read = 0;

    if (symm == nm::NONSYMM) {
      // Easy. Simply read the whole elements array.
      size_t length = nm_storage_count_max_elements(reinterpret_cast<STORAGE*>(storage));
      f.read(reinterpret_cast<char*>(storage->elements), length * sizeof(DType) );

      bytes_read += length * sizeof(DType);
    } else if (symm == LOWER) {

      // Read lower triangular portion and initialize remainder to 0
      DType* elements = reinterpret_cast<DType*>(storage->elements);
      size_t length = storage->shape[0];

      for (size_t i = 0; i < length; ++i) { // which row?

        f.read( reinterpret_cast<char*>(&(elements[i * length])), (i + 1) * sizeof(DType) );

        // need to zero-fill the rest of the row.
        for (size_t j = i+1; j < length; ++j)
          elements[i * length + j] = 0;

        bytes_read += (i + 1) * sizeof(DType);
      }
    } else {

      DType* elements = reinterpret_cast<DType*>(storage->elements);
      size_t length = storage->shape[0];

      for (size_t i = 0; i < length; ++i) { // which row?
        f.read( reinterpret_cast<char*>(&(elements[i * (length + 1)])), (length - i) * sizeof(DType) );

        bytes_read += (length - i) * sizeof(DType);
      }

      if (symm == SYMM) {
        for (size_t i = 0; i < length; ++i) {
          for (size_t j = i+1; j < length; ++j) {
            elements[j * length + i] = elements[i * length + j];
          }
        }
      } else if (symm == SKEW) {
        for (size_t i = 0; i < length; ++i) {
          for (size_t j = i+1; j < length; ++j) {
            elements[j * length + i] = -elements[i * length + j];
          }
        }
      } else if (symm == HERM) {
        read_padded_dense_elements_herm<DType>(elements, length);

      } else if (symm == UPPER) { // zero-fill the rest of the rows
        for (size_t i = 0; i < length; ++i) {
          for(size_t j = i+1; j < length; ++j) {
            elements[j * length + i] = 0;
          }
        }
      }

    }

    // Ignore any padding.
    if (bytes_read % 8) f.ignore(bytes_read % 8);
  }

  template <typename DType>
  void write_padded_yale_elements(std::ofstream& f, YALE_STORAGE* storage, size_t length, nm::symm_t symm) {
    if (symm != nm::NONSYMM) rb_raise(rb_eNotImpError, "Yale matrices can only be read/written in full form");

    // Keep track of bytes written for each of A and IJA so we know how much padding to use.
    size_t bytes_written = length * sizeof(DType);

    // Write A array
    f.write(reinterpret_cast<const char*>(storage->a), bytes_written);

    // Padding
    int64_t zero = 0;
    f.write(reinterpret_cast<const char*>(&zero), bytes_written % 8);

    bytes_written = length * sizeof(IType);
    f.write(reinterpret_cast<const char*>(storage->ija), bytes_written);

    // More padding
    f.write(reinterpret_cast<const char*>(&zero), bytes_written % 8);
  }


  template <typename DType>
  void read_padded_yale_elements(std::ifstream& f, YALE_STORAGE* storage, size_t length, nm::symm_t symm) {
    if (symm != NONSYMM) rb_raise(rb_eNotImpError, "Yale matrices can only be read/written in full form");

    size_t bytes_read = length * sizeof(DType);
    f.read(reinterpret_cast<char*>(storage->a), bytes_read);

    int64_t padding = 0;
    f.read(reinterpret_cast<char*>(&padding), bytes_read % 8);

    bytes_read = length * sizeof(IType);
    f.read(reinterpret_cast<char*>(storage->ija), bytes_read);

    f.read(reinterpret_cast<char*>(&padding), bytes_read % 8);
  }

  /*
   * Write the elements of a dense storage matrix to a binary file, padded to 64-bits.
   */
  template <typename DType>
  void write_padded_dense_elements(std::ofstream& f, DENSE_STORAGE* storage, nm::symm_t symm) {
    size_t bytes_written = 0;

    if (symm == nm::NONSYMM) {
      // Simply write the whole elements array.
      size_t length = nm_storage_count_max_elements(storage);
      f.write(reinterpret_cast<const char*>(storage->elements), length * sizeof(DType));

      bytes_written += length * sizeof(DType);

    } else if (symm == nm::LOWER) {

      // Write lower triangular portion. Assume 2D square matrix.
      DType* elements = reinterpret_cast<DType*>(storage->elements);
      size_t length = storage->shape[0];
      for (size_t i = 0; i < length; ++i) { // which row?

        f.write( reinterpret_cast<const char*>( &(elements[i * length]) ),
                 (i + 1) * sizeof(DType) );

        bytes_written += (i + 1) * sizeof(DType);
      }
    } else if (symm == nm::HERM) {
      bytes_written += write_padded_dense_elements_herm<DType>(f, storage, symm);
    } else { // HERM, UPPER, SYMM, SKEW
      bytes_written += write_padded_dense_elements_upper<DType>(f, storage, symm);
    }

    // Padding
    int64_t zero = 0;
    f.write(reinterpret_cast<const char*>(&zero), bytes_written % 8);
  }

} // end of namespace nm

extern "C" {
  #include "ruby_nmatrix.c"
} // end of extern "C"


================================================
FILE: ext/nmatrix/nmatrix.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == nmatrix.h
//
// C and C++ API for NMatrix, and main header file.

#ifndef NMATRIX_H
#define NMATRIX_H

/*
 * Standard Includes
 */

#include <ruby.h>
#include "ruby_constants.h"

#ifdef __cplusplus
  #include <cmath>
  #include <cstring>
#else
  #include <math.h>
  #include <string.h>
#endif

#ifdef BENCHMARK
  // SOURCE: http://stackoverflow.com/questions/2349776/how-can-i-benchmark-a-c-program-easily
  #ifdef __cplusplus
    #include <sys/ctime>
    #include <sys/cresource>
  #else
    #include <sys/time.h>
    #include <sys/resource.h>
  #endif
#endif

#ifdef __cplusplus
  #include "nm_memory.h"
#endif

#ifndef RB_BUILTIN_TYPE
# define RB_BUILTIN_TYPE(obj) BUILTIN_TYPE(obj)
#endif

#ifndef RB_FLOAT_TYPE_P
/* NOTE: assume flonum doesn't exist */
# define RB_FLOAT_TYPE_P(obj) ( \
    (!SPECIAL_CONST_P(obj) && BUILTIN_TYPE(obj) == T_FLOAT))
#endif

#ifndef RB_TYPE_P
# define RB_TYPE_P(obj, type) ( \
    ((type) == T_FIXNUM) ? FIXNUM_P(obj) : \
    ((type) == T_TRUE) ? ((obj) == Qtrue) : \
    ((type) == T_FALSE) ? ((obj) == Qfalse) : \
    ((type) == T_NIL) ? ((obj) == Qnil) : \
    ((type) == T_UNDEF) ? ((obj) == Qundef) : \
    ((type) == T_SYMBOL) ? SYMBOL_P(obj) : \
    ((type) == T_FLOAT) ? RB_FLOAT_TYPE_P(obj) : \
    (!SPECIAL_CONST_P(obj) && BUILTIN_TYPE(obj) == (type)))
#endif

#ifndef FIX_CONST_VALUE_PTR
# if defined(__fcc__) || defined(__fcc_version) || \
    defined(__FCC__) || defined(__FCC_VERSION)
/* workaround for old version of Fujitsu C Compiler (fcc) */
#  define FIX_CONST_VALUE_PTR(x) ((const VALUE *)(x))
# else
#  define FIX_CONST_VALUE_PTR(x) (x)
# endif
#endif

#ifndef HAVE_RB_ARRAY_CONST_PTR
static inline const VALUE *
rb_array_const_ptr(VALUE a)
{
  return FIX_CONST_VALUE_PTR((RBASIC(a)->flags & RARRAY_EMBED_FLAG) ?
    RARRAY(a)->as.ary : RARRAY(a)->as.heap.ptr);
}
#endif

#ifndef RARRAY_CONST_PTR
# define RARRAY_CONST_PTR(a) rb_array_const_ptr(a)
#endif

#ifndef RARRAY_AREF
# define RARRAY_AREF(a, i) (RARRAY_CONST_PTR(a)[i])
#endif

/*
 * Macros
 */

#define RUBY_ZERO INT2FIX(0)

#ifndef SIZEOF_INT
  #error SIZEOF_INT undefined
#else
  #if SIZEOF_INT == 8
    #define DEFAULT_DTYPE  INT64
    #define SIZE_T         INT64
  #else
    #if SIZEOF_INT == 4
      #define DEFAULT_DTYPE INT32
      #define SIZE_T        INT32
    #else
      #if SIZEOF_INT == 2
        #define DEFAULT_DTYPE INT16
        #define SIZE_T        INT16
      #else
        #error Unhandled SIZEOF_INT -- please #define SIZE_T and DEFAULT_DTYPE manually.
      #endif
    #endif
  #endif
#endif

/*
 * == Macros for Concurrent C and C++ Header Maintenance
 *
 * These macros look complicated, but they're really not so bad. They're also important: they ensure that whether our
 * header file (nmatrix.h) is read by a C++ or a C compiler, all the same data structures and enumerators exist, albeit
 * with slightly different names.
 *
 * "But wait," you say, "You use structs. Structs exist in C and C++. Why use a macro to set them up?"
 *
 * Well, in C, you have to be explicit about what a struct is. You can actually get around that requirement by using a
 * typedef:
 *
 *   typedef struct STORAGE { ... } STORAGE;
 *
 * Also, we use C++ inheritance, which is obviously not allowed in C. So we have to ensure that the base class's members
 * are exposed properly to our child classes.
 *
 * The macros also allow us to put all of our C++ types into namespaces. For C, we prefix everything with either nm_ or
 * NM_ to distinguish our declarations from those in other libraries.
 */


#ifdef __cplusplus /* These are the C++ versions of the macros. */

  /*
   * If no block is given, return an enumerator. This copied straight out of ruby's include/ruby/intern.h.
   *
   * rb_enumeratorize is located in enumerator.c.
   *
   *    VALUE rb_enumeratorize(VALUE obj, VALUE meth, int argc, VALUE *argv) {
   *      return enumerator_init(enumerator_allocate(rb_cEnumerator), obj, meth, argc, argv);
   *    }
   */

//opening portion -- this allows unregistering any objects in use before returning
  #define RETURN_SIZED_ENUMERATOR_PRE do { \
    if (!rb_block_given_p()) {

//remaining portion
  #ifdef RUBY_2
    #ifndef RETURN_SIZED_ENUMERATOR
      #undef RETURN_SIZED_ENUMERATOR
      // Ruby 2.0 and higher has rb_enumeratorize_with_size instead of rb_enumeratorize.
      // We want to support both in the simplest way possible.
      #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) \
        return rb_enumeratorize_with_size((obj), ID2SYM(rb_frame_this_func()), (argc), (argv), (size_fn));  \
      } \
    } while (0)
    #endif
  #else
    #undef RETURN_SIZED_ENUMERATOR
    #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) \
      return rb_enumeratorize((obj), ID2SYM(rb_frame_this_func()), (argc), (argv));   \
      } \
    } while (0)
  #endif

  #define NM_DECL_ENUM(enum_type, name)   nm::enum_type name
  #define NM_DECL_STRUCT(type, name)      type          name;

  #define NM_DEF_STORAGE_ELEMENTS    \
    NM_DECL_ENUM(dtype_t, dtype);    \
    size_t      dim;                 \
    size_t*     shape;               \
    size_t*     offset;              \
    int         count;               \
    STORAGE*    src;

  #define NM_DEF_STORAGE_CHILD_STRUCT_PRE(name)    struct name : STORAGE {
  #define NM_DEF_STORAGE_STRUCT_POST(name)         };

  #define NM_DEF_STORAGE_STRUCT      \
  struct STORAGE {                   \
    NM_DEF_STORAGE_ELEMENTS;         \
  };

  #define NM_DEF_STRUCT_PRE(name)  struct name {
  #define NM_DEF_STRUCT_POST(name) };

  #define NM_DEF_ENUM(name, ...)          \
    namespace nm {                        \
      enum name {                         \
        __VA_ARGS__                       \
      };                                  \
    } // end of namespace nm

#else   /* These are the C versions of the macros. */

  #define NM_DECL_ENUM(enum_type, name)   nm_ ## enum_type name
  #define NM_DECL_STRUCT(type, name)      struct NM_ ## type      name;

  #define NM_DEF_STORAGE_ELEMENTS   \
    NM_DECL_ENUM(dtype_t, dtype);   \
    size_t      dim;                \
    size_t*     shape;              \
    size_t*     offset;             \
    int         count;              \
    NM_DECL_STRUCT(STORAGE*, src);
  #define NM_DEF_STORAGE_CHILD_STRUCT_PRE(name)  typedef struct NM_ ## name { \
                                                   NM_DEF_STORAGE_ELEMENTS;

  #define NM_DEF_STORAGE_STRUCT_POST(name)       } NM_ ## name;

  #define NM_DEF_STORAGE_STRUCT      \
  typedef struct NM_STORAGE {        \
    NM_DEF_STORAGE_ELEMENTS;         \
  } NM_STORAGE;

  #define NM_DEF_STRUCT_PRE(name)                typedef struct NM_ ## name {
  #define NM_DEF_STRUCT_POST(name)               } NM_ ## name;

  #define NM_DEF_ENUM(name, ...)     \
    typedef enum nm_ ## name {       \
      __VA_ARGS__                    \
    } nm_ ## name;

#endif      /* End of C/C++ Parallel Header Macro Definitions */


/*
 * Types
 */

#define NM_NUM_DTYPES 10  // data/data.h
#define NM_NUM_STYPES 3   // storage/storage.h

//#ifdef __cplusplus
//namespace nm {
//#endif

/* Storage Type -- Dense or Sparse */
NM_DEF_ENUM(stype_t,  DENSE_STORE = 0,
                      LIST_STORE = 1,
                      YALE_STORE = 2);

/* Data Type */
NM_DEF_ENUM(dtype_t,    BYTE                =  0,  // unsigned char
                        INT8                =  1,  // char
                        INT16               =  2,  // short
                        INT32               =  3,  // int
                        INT64               =  4,  // long
                        FLOAT32         =  5,  // float
                        FLOAT64         =  6,  // double
                        COMPLEX64       =  7,  // Complex64 class
                        COMPLEX128  =  8,  // Complex128 class
                        RUBYOBJ         = 9);  // Ruby VALUE type

NM_DEF_ENUM(symm_t,   NONSYMM   = 0,
                      SYMM      = 1,
                      SKEW      = 2,
                      HERM      = 3,
                      UPPER     = 4,
                      LOWER     = 5);

//#ifdef __cplusplus
//}; // end of namespace nm
//#endif

/* struct STORAGE */
NM_DEF_STORAGE_STRUCT;

/* Dense Storage */
NM_DEF_STORAGE_CHILD_STRUCT_PRE(DENSE_STORAGE); // struct DENSE_STORAGE : STORAGE {
  void*     elements; // should go first to align with void* a in yale and NODE* first in list.
  size_t*   stride;
NM_DEF_STORAGE_STRUCT_POST(DENSE_STORAGE);     // };

/* Yale Storage */
NM_DEF_STORAGE_CHILD_STRUCT_PRE(YALE_STORAGE);
  void*   a;      // should go first
  size_t  ndnz; // Strictly non-diagonal non-zero count!
  size_t  capacity;
  size_t* ija;
NM_DEF_STORAGE_STRUCT_POST(YALE_STORAGE);

// FIXME: NODE and LIST should be put in some kind of namespace or something, at least in C++.
NM_DEF_STRUCT_PRE(NODE); // struct NODE {
  size_t key;
  void*  val;
  NM_DECL_STRUCT(NODE*, next);  // NODE* next;
NM_DEF_STRUCT_POST(NODE); // };

NM_DEF_STRUCT_PRE(LIST); // struct LIST {
  NM_DECL_STRUCT(NODE*, first); // NODE* first;
NM_DEF_STRUCT_POST(LIST); // };

/* List-of-Lists Storage */
NM_DEF_STORAGE_CHILD_STRUCT_PRE(LIST_STORAGE); // struct LIST_STORAGE : STORAGE {
  // List storage specific elements.
  void* default_val;
  NM_DECL_STRUCT(LIST*, rows); // LIST* rows;
NM_DEF_STORAGE_STRUCT_POST(LIST_STORAGE);      // };


/* NMATRIX Object */
NM_DEF_STRUCT_PRE(NMATRIX);   // struct NMATRIX {
  NM_DECL_ENUM(stype_t, stype);       // stype_t stype;     // Method of storage (csc, dense, etc).
  NM_DECL_STRUCT(STORAGE*, storage);  // STORAGE* storage;  // Pointer to storage struct.
NM_DEF_STRUCT_POST(NMATRIX);  // };

/* Structs for dealing with VALUEs in use so that they don't get GC'd */

NM_DEF_STRUCT_PRE(NM_GC_LL_NODE);       // struct NM_GC_LL_NODE {
  VALUE* val;                           //   VALUE* val;
  size_t n;                             //   size_t n;
  NM_DECL_STRUCT(NM_GC_LL_NODE*, next); //   NM_GC_LL_NODE* next;
NM_DEF_STRUCT_POST(NM_GC_LL_NODE);      // };

NM_DEF_STRUCT_PRE(NM_GC_HOLDER);        // struct NM_GC_HOLDER {
  NM_DECL_STRUCT(NM_GC_LL_NODE*, start); //  NM_GC_LL_NODE* start;
NM_DEF_STRUCT_POST(NM_GC_HOLDER);       // };

#define NM_MAX_RANK 15

#define UnwrapNMatrix(obj,var)  Data_Get_Struct(obj, NMATRIX, var)

#define NM_STORAGE(val)         (NM_STRUCT(val)->storage)
#ifdef __cplusplus
  #define NM_STRUCT(val)              ((NMATRIX*)(DATA_PTR(val)))
  #define NM_STORAGE_LIST(val)        ((LIST_STORAGE*)(NM_STORAGE(val)))
  #define NM_STORAGE_YALE(val)        ((YALE_STORAGE*)(NM_STORAGE(val)))
  #define NM_STORAGE_DENSE(val)       ((DENSE_STORAGE*)(NM_STORAGE(val)))
#else
  #define NM_STRUCT(val)              ((struct NM_NMATRIX*)(DATA_PTR(val)))
  #define NM_STORAGE_LIST(val)        ((struct NM_LIST_STORAGE*)(NM_STORAGE(val)))
  #define NM_STORAGE_YALE(val)        ((struct NM_YALE_STORAGE*)(NM_STORAGE(val)))
  #define NM_STORAGE_DENSE(val)       ((struct NM_DENSE_STORAGE*)(NM_STORAGE(val)))
#endif

#define NM_SRC(val)             (NM_STORAGE(val)->src)
#define NM_DIM(val)             (NM_STORAGE(val)->dim)

// Returns an int corresponding the data type of the nmatrix. See the dtype_t
// enum for a list of possible data types.
#define NM_DTYPE(val)           (NM_STORAGE(val)->dtype)

// Returns a number corresponding the storage type of the nmatrix. See the stype_t
// enum for a list of possible storage types.
#define NM_STYPE(val)           (NM_STRUCT(val)->stype)

// Get the shape of the ith dimension (int)
#define NM_SHAPE(val,i)         (NM_STORAGE(val)->shape[(i)])

// Get the shape of the 0th dimension (int)
#define NM_SHAPE0(val)          (NM_STORAGE(val)->shape[0])

// Get the shape of the 1st dimenension (int)
#define NM_SHAPE1(val)          (NM_STORAGE(val)->shape[1])

// Get the default value assigned to the nmatrix.
#define NM_DEFAULT_VAL(val)     (NM_STORAGE_LIST(val)->default_val)

// Number of elements in a dense nmatrix.
#define NM_DENSE_COUNT(val)     (nm_storage_count_max_elements(NM_STORAGE_DENSE(val)))

// Get a pointer to the array that stores elements in a dense matrix.
#define NM_DENSE_ELEMENTS(val)  (NM_STORAGE_DENSE(val)->elements)
#define NM_SIZEOF_DTYPE(val)    (DTYPE_SIZES[NM_DTYPE(val)])
#define NM_REF(val,slice)       (RefFuncs[NM_STYPE(val)]( NM_STORAGE(val), slice, NM_SIZEOF_DTYPE(val) ))

#define NM_MAX(a,b) (((a)>(b))?(a):(b))
#define NM_MIN(a,b) (((a)>(b))?(b):(a))
#define NM_SWAP(a,b,tmp) {(tmp)=(a);(a)=(b);(b)=(tmp);}

#define NM_CHECK_ALLOC(x) if (!x) rb_raise(rb_eNoMemError, "insufficient memory");

#define RB_FILE_EXISTS(fn)   (rb_funcall(rb_const_get(rb_cObject, rb_intern("File")), rb_intern("exists?"), 1, (fn)) == Qtrue)

#define IsNMatrixType(v)  (RB_TYPE_P(v, T_DATA) && (RDATA(v)->dfree == (RUBY_DATA_FUNC)nm_delete || RDATA(v)->dfree == (RUBY_DATA_FUNC)nm_delete_ref))
#define CheckNMatrixType(v)   if (!IsNMatrixType(v)) rb_raise(rb_eTypeError, "expected NMatrix on left-hand side of operation");

#define NM_IsNMatrix(obj) \
  (rb_obj_is_kind_of(obj, cNMatrix) == Qtrue)

#define NM_IsNVector(obj) \
  (rb_obj_is_kind_of(obj, cNVector) == Qtrue)

#define RB_P(OBJ) \
  rb_funcall(rb_stderr, rb_intern("print"), 1, rb_funcall(OBJ, rb_intern("object_id"), 0)); \
  rb_funcall(rb_stderr, rb_intern("puts"), 1, rb_funcall(OBJ, rb_intern("inspect"), 0));


#ifdef __cplusplus
typedef VALUE (*METHOD)(...);

//}; // end of namespace nm
#endif

// In the init code below, we need to use NMATRIX for c++ and NM_NMATRIX for c
// this macro chooses the correct one:
#ifdef __cplusplus
  #define _NMATRIX NMATRIX
  #define _STORAGE STORAGE
#else
  #define _NMATRIX NM_NMATRIX
  #define _STORAGE NM_STORAGE
#endif

/*
 * Functions
 */

#ifdef __cplusplus
extern "C" {
#endif

  void Init_nmatrix();
  // External API
  VALUE rb_nmatrix_dense_create(NM_DECL_ENUM(dtype_t, dtype), size_t* shape, size_t dim, void* elements, size_t length);
  VALUE rb_nvector_dense_create(NM_DECL_ENUM(dtype_t, dtype), void* elements, size_t length);

  NM_DECL_ENUM(dtype_t, nm_dtype_guess(VALUE));   // (This is a function)
  NM_DECL_ENUM(dtype_t, nm_dtype_min(VALUE));

  // Non-API functions needed by other cpp files.
  _NMATRIX* nm_create(NM_DECL_ENUM(stype_t, stype), _STORAGE* storage);
  _NMATRIX* nm_cast_with_ctype_args(_NMATRIX* self, NM_DECL_ENUM(stype_t, new_stype), NM_DECL_ENUM(dtype_t, new_dtype), void* init_ptr);
  VALUE    nm_cast(VALUE self, VALUE new_stype_symbol, VALUE new_dtype_symbol, VALUE init);
  void     nm_mark(_NMATRIX* mat);
  void     nm_delete(_NMATRIX* mat);
  void     nm_delete_ref(_NMATRIX* mat);
  void     nm_register_values(VALUE* vals, size_t n);
  void     nm_register_value(VALUE* val);
  void     nm_unregister_value(VALUE* val);
  void     nm_unregister_values(VALUE* vals, size_t n);
  void     nm_register_storage(NM_DECL_ENUM(stype_t, stype), const _STORAGE* storage);
  void     nm_unregister_storage(NM_DECL_ENUM(stype_t, stype), const _STORAGE* storage);
  void     nm_register_nmatrix(_NMATRIX* nmatrix);
  void     nm_unregister_nmatrix(_NMATRIX* nmatrix);
  void     nm_completely_unregister_value(VALUE* val);

#ifdef __cplusplus
}
#endif

#undef _NMATRIX
#undef _STORAGE

#endif // NMATRIX_H


================================================
FILE: ext/nmatrix/ruby_constants.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == ruby_symbols.cpp
//
// Ruby symbols used throught the NMatrix project.

/*
 * Standard Includes
 */

#include <ruby.h>

/*
 * Global Variables
 */

ID  nm_rb_dtype,
    nm_rb_stype,

    nm_rb_capacity,
    nm_rb_default,

    nm_rb_real,
    nm_rb_imag,

    nm_rb_numer,
    nm_rb_denom,

    nm_rb_complex_conjugate,
    nm_rb_transpose,
    nm_rb_no_transpose,
    nm_rb_left,
    nm_rb_right,
    nm_rb_upper,
    nm_rb_lower,
    nm_rb_unit,
    nm_rb_nonunit,

    nm_rb_dense,
    nm_rb_list,
    nm_rb_yale,

    nm_rb_row,
    nm_rb_column,
    nm_rb_add,
    nm_rb_sub,
    nm_rb_mul,
    nm_rb_div,
    nm_rb_both,
    nm_rb_none,

    nm_rb_negate,

    nm_rb_percent,
    nm_rb_gt,
    nm_rb_lt,
    nm_rb_eql,
    nm_rb_neql,
    nm_rb_gte,
    nm_rb_lte,

    nm_rb_hash;

VALUE cNMatrix,
      cNMatrix_IO,
      cNMatrix_IO_Matlab,
      cNMatrix_YaleFunctions,

      cNMatrix_GC_holder,

      nm_eDataTypeError,
      nm_eConvergenceError,
      nm_eStorageTypeError,
      nm_eShapeError,
      nm_eNotInvertibleError;

/*
 * Functions
 */

void nm_init_ruby_constants(void) {
  nm_rb_dtype             = rb_intern("dtype");
  nm_rb_stype             = rb_intern("stype");

  nm_rb_capacity          = rb_intern("capacity");
  nm_rb_default           = rb_intern("default");

  nm_rb_real              = rb_intern("real");
  nm_rb_imag              = rb_intern("imag");

  nm_rb_numer              = rb_intern("numerator");
  nm_rb_denom              = rb_intern("denominator");

  nm_rb_complex_conjugate  = rb_intern("complex_conjugate");
  nm_rb_transpose          = rb_intern("transpose");
  nm_rb_no_transpose      = rb_intern("no_transpose");

  nm_rb_dense             = rb_intern("dense");
  nm_rb_list              = rb_intern("list");
  nm_rb_yale              = rb_intern("yale");

  nm_rb_add                = rb_intern("+");
  nm_rb_sub                = rb_intern("-");
  nm_rb_mul                = rb_intern("*");
  nm_rb_div                = rb_intern("/");

  nm_rb_negate            = rb_intern("-@");

  nm_rb_percent            = rb_intern("%");
  nm_rb_gt                = rb_intern(">");
  nm_rb_lt                = rb_intern("<");
  nm_rb_eql                = rb_intern("==");
  nm_rb_neql              = rb_intern("!=");
  nm_rb_gte                = rb_intern(">=");
  nm_rb_lte                = rb_intern("<=");

  nm_rb_left              = rb_intern("left");
  nm_rb_right             = rb_intern("right");
  nm_rb_upper             = rb_intern("upper");
  nm_rb_lower             = rb_intern("lower");
  nm_rb_unit              = rb_intern("unit");
  nm_rb_nonunit           = rb_intern("nonunit");
  nm_rb_hash              = rb_intern("hash");

  nm_rb_column            = rb_intern("column");
  nm_rb_row               = rb_intern("row");

  nm_rb_both              = rb_intern("both");
  nm_rb_none              = rb_intern("none");
}


================================================
FILE: ext/nmatrix/ruby_constants.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == data.h
//
// Header file for dealing with data types.

#ifndef RUBY_CONSTANTS_H
#define RUBY_CONSTANTS_H

/*
 * Standard Includes
 */

#include <ruby.h>

/*
 * Data
 */

extern ID nm_rb_dtype,
          nm_rb_stype,

          nm_rb_capacity,
          nm_rb_default,

          nm_rb_real,
          nm_rb_imag,

          nm_rb_numer,
          nm_rb_denom,

          nm_rb_complex_conjugate,
          nm_rb_transpose,
          nm_rb_no_transpose,
          nm_rb_left,
          nm_rb_right,
          nm_rb_upper,
          nm_rb_lower,
          nm_rb_unit,
          nm_rb_nonunit,

          nm_rb_dense,
          nm_rb_list,
          nm_rb_yale,

          nm_rb_row,
          nm_rb_column,

          nm_rb_add,
          nm_rb_sub,
          nm_rb_mul,
          nm_rb_div,

          nm_rb_negate,

          nm_rb_percent,
          nm_rb_gt,
          nm_rb_lt,
          nm_rb_eql,
          nm_rb_neql,
          nm_rb_gte,
          nm_rb_lte,

          nm_rb_hash;

extern VALUE  cNMatrix,
              cNMatrix_IO,
              cNMatrix_IO_Matlab,
              cNMatrix_YaleFunctions,

              cNMatrix_GC_holder,

              nm_eDataTypeError,
              nm_eConvergenceError,
              nm_eStorageTypeError,
              nm_eShapeError,
              nm_eNotInvertibleError;

/*
 * Functions
 */

void nm_init_ruby_constants(void);

#endif // RUBY_CONSTANTS_H


================================================
FILE: ext/nmatrix/ruby_nmatrix.c
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == ruby_nmatrix.c
//
// Ruby-facing NMatrix C functions. Not compiled directly -- included
// into nmatrix.cpp.
//

/*
 * Forward Declarations
 */

static VALUE nm_init(int argc, VALUE* argv, VALUE nm);
static VALUE nm_init_copy(VALUE copy, VALUE original);
static VALUE nm_init_transposed(VALUE self);
static VALUE nm_read(int argc, VALUE* argv, VALUE self);
static VALUE nm_write(int argc, VALUE* argv, VALUE self);
static VALUE nm_init_yale_from_old_yale(VALUE shape, VALUE dtype, VALUE ia, VALUE ja, VALUE a, VALUE from_dtype, VALUE nm);
static VALUE nm_alloc(VALUE klass);
static VALUE nm_dtype(VALUE self);
static VALUE nm_stype(VALUE self);
static VALUE nm_default_value(VALUE self);
static size_t effective_dim(STORAGE* s);
static VALUE nm_effective_dim(VALUE self);
static VALUE nm_dim(VALUE self);
static VALUE nm_offset(VALUE self);
static VALUE nm_shape(VALUE self);
static VALUE nm_supershape(VALUE self);
static VALUE nm_capacity(VALUE self);
static VALUE nm_each_with_indices(VALUE nmatrix);
static VALUE nm_each_stored_with_indices(VALUE nmatrix);
static VALUE nm_each_ordered_stored_with_indices(VALUE nmatrix);
static VALUE nm_map_stored(VALUE nmatrix);

static void init_slice_no_alloc(SLICE* slice, size_t dim, int argc, VALUE* arg, size_t* shape);
static VALUE nm_xslice(int argc, VALUE* argv, void* (*slice_func)(const STORAGE*, SLICE*), void (*delete_func)(NMATRIX*), VALUE self);
static VALUE nm_mset(int argc, VALUE* argv, VALUE self);
static VALUE nm_mget(int argc, VALUE* argv, VALUE self);
static VALUE nm_mref(int argc, VALUE* argv, VALUE self);
static VALUE nm_is_ref(VALUE self);

static VALUE is_symmetric(VALUE self, bool hermitian);

static VALUE nm_guess_dtype(VALUE self, VALUE v);
static VALUE nm_min_dtype(VALUE self, VALUE v);

static VALUE nm_data_pointer(VALUE self);

/*
 * Macro defines an element-wise accessor function for some operation.
 *
 * This is only responsible for the Ruby accessor! You still have to write the actual functions, obviously.
 */
#define DEF_ELEMENTWISE_RUBY_ACCESSOR(oper, name)                 \
static VALUE nm_ew_##name(VALUE left_val, VALUE right_val) {  \
  return elementwise_op(nm::EW_##oper, left_val, right_val);  \
}

#define DEF_UNARY_RUBY_ACCESSOR(oper, name)                 \
static VALUE nm_unary_##name(VALUE self) {  \
  return unary_op(nm::UNARY_##oper, self);  \
}

#define DEF_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(oper, name) \
static VALUE nm_noncom_ew_##name(int argc, VALUE* argv, VALUE self) { \
  if (argc > 1) { \
    return noncom_elementwise_op(nm::NONCOM_EW_##oper, self, argv[0], argv[1]); \
  } else { \
    return noncom_elementwise_op(nm::NONCOM_EW_##oper, self, argv[0], Qfalse); \
  } \
}


/*
 * Macro declares a corresponding accessor function prototype for some element-wise operation.
 */
#define DECL_ELEMENTWISE_RUBY_ACCESSOR(name)    static VALUE nm_ew_##name(VALUE left_val, VALUE right_val);
#define DECL_UNARY_RUBY_ACCESSOR(name)          static VALUE nm_unary_##name(VALUE self);
#define DECL_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(name)    static VALUE nm_noncom_ew_##name(int argc, VALUE* argv, VALUE self);

DECL_ELEMENTWISE_RUBY_ACCESSOR(add)
DECL_ELEMENTWISE_RUBY_ACCESSOR(subtract)
DECL_ELEMENTWISE_RUBY_ACCESSOR(multiply)
DECL_ELEMENTWISE_RUBY_ACCESSOR(divide)
DECL_ELEMENTWISE_RUBY_ACCESSOR(power)
DECL_ELEMENTWISE_RUBY_ACCESSOR(mod)
DECL_ELEMENTWISE_RUBY_ACCESSOR(eqeq)
DECL_ELEMENTWISE_RUBY_ACCESSOR(neq)
DECL_ELEMENTWISE_RUBY_ACCESSOR(lt)
DECL_ELEMENTWISE_RUBY_ACCESSOR(gt)
DECL_ELEMENTWISE_RUBY_ACCESSOR(leq)
DECL_ELEMENTWISE_RUBY_ACCESSOR(geq)
DECL_UNARY_RUBY_ACCESSOR(sin)
DECL_UNARY_RUBY_ACCESSOR(cos)
DECL_UNARY_RUBY_ACCESSOR(tan)
DECL_UNARY_RUBY_ACCESSOR(asin)
DECL_UNARY_RUBY_ACCESSOR(acos)
DECL_UNARY_RUBY_ACCESSOR(atan)
DECL_UNARY_RUBY_ACCESSOR(sinh)
DECL_UNARY_RUBY_ACCESSOR(cosh)
DECL_UNARY_RUBY_ACCESSOR(tanh)
DECL_UNARY_RUBY_ACCESSOR(asinh)
DECL_UNARY_RUBY_ACCESSOR(acosh)
DECL_UNARY_RUBY_ACCESSOR(atanh)
DECL_UNARY_RUBY_ACCESSOR(exp)
DECL_UNARY_RUBY_ACCESSOR(log2)
DECL_UNARY_RUBY_ACCESSOR(log10)
DECL_UNARY_RUBY_ACCESSOR(sqrt)
DECL_UNARY_RUBY_ACCESSOR(erf)
DECL_UNARY_RUBY_ACCESSOR(erfc)
DECL_UNARY_RUBY_ACCESSOR(cbrt)
DECL_UNARY_RUBY_ACCESSOR(gamma)
DECL_UNARY_RUBY_ACCESSOR(negate)
DECL_UNARY_RUBY_ACCESSOR(floor)
DECL_UNARY_RUBY_ACCESSOR(ceil)
DECL_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(atan2)
DECL_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(ldexp)
DECL_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(hypot)

//log/round can be unary, but also take a base argument, as with Math.log
static VALUE nm_unary_log(int argc, VALUE* argv, VALUE self);
static VALUE nm_unary_round(int argc, VALUE* argv, VALUE self);

static VALUE elementwise_op(nm::ewop_t op, VALUE left_val, VALUE right_val);
static VALUE unary_op(nm::unaryop_t op, VALUE self);
static VALUE noncom_elementwise_op(nm::noncom_ewop_t op, VALUE self, VALUE other, VALUE orderflip);

static VALUE nm_symmetric(VALUE self);
static VALUE nm_hermitian(VALUE self);

static VALUE nm_eqeq(VALUE left, VALUE right);

static VALUE matrix_multiply_scalar(NMATRIX* left, VALUE scalar);
static VALUE matrix_multiply(NMATRIX* left, NMATRIX* right);
static VALUE nm_multiply(VALUE left_v, VALUE right_v);
static VALUE nm_det_exact(VALUE self);
static VALUE nm_hessenberg(VALUE self, VALUE a);
static VALUE nm_inverse(VALUE self, VALUE inverse, VALUE bang);
static VALUE nm_inverse_exact(VALUE self, VALUE inverse, VALUE lda, VALUE ldb);
static VALUE nm_complex_conjugate_bang(VALUE self);
static VALUE nm_reshape_bang(VALUE self, VALUE arg);

static nm::dtype_t  interpret_dtype(int argc, VALUE* argv, nm::stype_t stype);
static void*    interpret_initial_value(VALUE arg, nm::dtype_t dtype);
static size_t*  interpret_shape(VALUE arg, size_t* dim);
static nm::stype_t  interpret_stype(VALUE arg);

/* Singleton methods */
static VALUE nm_upcast(VALUE self, VALUE t1, VALUE t2);


#ifdef BENCHMARK
static double get_time(void);
#endif

///////////////////
// Ruby Bindings //
///////////////////

void Init_nmatrix() {


  ///////////////////////
  // Class Definitions //
  ///////////////////////

  cNMatrix = rb_define_class("NMatrix", rb_cObject);

  // Special exceptions

  /*
   * Exception raised when there's a problem with data.
   */
  nm_eDataTypeError    = rb_define_class("DataTypeError", rb_eStandardError);

  /*
   * Exception raised when something goes wrong with the storage of a matrix.
   */
  nm_eStorageTypeError = rb_define_class("StorageTypeError", rb_eStandardError);

  /*
   * Exception raise when the matrix shape is not appropriate for a given operation.
   */
  nm_eShapeError = rb_define_class("ShapeError", rb_eStandardError);

  /*
   * Exception raise when an inverse is requested but the matrix is not invertible.
   */
  nm_eNotInvertibleError = rb_define_class("NotInvertibleError", rb_eStandardError);

  /*
   * :nodoc:
   * Class that holds values in use by the C code.
   */
  cNMatrix_GC_holder = rb_define_class("NMGCHolder", rb_cObject);


  ///////////////////
  // Class Methods //
  ///////////////////

  rb_define_alloc_func(cNMatrix, nm_alloc);

  ///////////////////////
  // Singleton Methods //
  ///////////////////////

  rb_define_singleton_method(cNMatrix, "upcast", (METHOD)nm_upcast, 2); /* in ext/nmatrix/nmatrix.cpp */
  rb_define_singleton_method(cNMatrix, "guess_dtype", (METHOD)nm_guess_dtype, 1);
  rb_define_singleton_method(cNMatrix, "min_dtype", (METHOD)nm_min_dtype, 1);

  //////////////////////
  // Instance Methods //
  //////////////////////

  rb_define_method(cNMatrix, "initialize", (METHOD)nm_init, -1);
  rb_define_method(cNMatrix, "initialize_copy", (METHOD)nm_init_copy, 1);
  rb_define_singleton_method(cNMatrix, "read", (METHOD)nm_read, -1);

  rb_define_method(cNMatrix, "write", (METHOD)nm_write, -1);

  // Technically, the following function is a copy constructor.
  rb_define_protected_method(cNMatrix, "clone_transpose", (METHOD)nm_init_transposed, 0);

  rb_define_method(cNMatrix, "dtype", (METHOD)nm_dtype, 0);
  rb_define_method(cNMatrix, "stype", (METHOD)nm_stype, 0);
  rb_define_method(cNMatrix, "cast_full",  (METHOD)nm_cast, 3);
  rb_define_method(cNMatrix, "default_value", (METHOD)nm_default_value, 0);
  rb_define_protected_method(cNMatrix, "__list_default_value__", (METHOD)nm_list_default_value, 0);
  rb_define_protected_method(cNMatrix, "__yale_default_value__", (METHOD)nm_yale_default_value, 0);

  rb_define_method(cNMatrix, "[]", (METHOD)nm_mref, -1);
  rb_define_method(cNMatrix, "slice", (METHOD)nm_mget, -1);
  rb_define_method(cNMatrix, "[]=", (METHOD)nm_mset, -1);
  rb_define_method(cNMatrix, "is_ref?", (METHOD)nm_is_ref, 0);
  rb_define_method(cNMatrix, "dimensions", (METHOD)nm_dim, 0);
  rb_define_method(cNMatrix, "effective_dimensions", (METHOD)nm_effective_dim, 0);

  rb_define_protected_method(cNMatrix, "__list_to_hash__", (METHOD)nm_to_hash, 0); // handles list and dense, which are n-dimensional

  rb_define_method(cNMatrix, "shape", (METHOD)nm_shape, 0);
  rb_define_method(cNMatrix, "supershape", (METHOD)nm_supershape, 0);
  rb_define_method(cNMatrix, "offset", (METHOD)nm_offset, 0);
  rb_define_method(cNMatrix, "det_exact", (METHOD)nm_det_exact, 0);
  rb_define_method(cNMatrix, "complex_conjugate!", (METHOD)nm_complex_conjugate_bang, 0);

  rb_define_protected_method(cNMatrix, "reshape_bang", (METHOD)nm_reshape_bang, 1);

  // Iterators public methods
  rb_define_method(cNMatrix, "each_with_indices", (METHOD)nm_each_with_indices, 0);
  rb_define_method(cNMatrix, "each_stored_with_indices", (METHOD)nm_each_stored_with_indices, 0);
  rb_define_method(cNMatrix, "map_stored", (METHOD)nm_map_stored, 0);
  rb_define_method(cNMatrix, "each_ordered_stored_with_indices", (METHOD)nm_each_ordered_stored_with_indices, 0);

  // Iterators protected methods
  rb_define_protected_method(cNMatrix, "__dense_each__", (METHOD)nm_dense_each, 0);
  rb_define_protected_method(cNMatrix, "__dense_map__", (METHOD)nm_dense_map, 0);
  rb_define_protected_method(cNMatrix, "__dense_map_pair__", (METHOD)nm_dense_map_pair, 1);
  rb_define_protected_method(cNMatrix, "__list_map_merged_stored__", (METHOD)nm_list_map_merged_stored, 2);
  rb_define_protected_method(cNMatrix, "__list_map_stored__", (METHOD)nm_list_map_stored, 1);
  rb_define_protected_method(cNMatrix, "__yale_map_merged_stored__", (METHOD)nm_yale_map_merged_stored, 2);
  rb_define_protected_method(cNMatrix, "__yale_map_stored__", (METHOD)nm_yale_map_stored, 0);
  rb_define_protected_method(cNMatrix, "__yale_stored_diagonal_each_with_indices__", (METHOD)nm_yale_stored_diagonal_each_with_indices, 0);
  rb_define_protected_method(cNMatrix, "__yale_stored_nondiagonal_each_with_indices__", (METHOD)nm_yale_stored_nondiagonal_each_with_indices, 0);

  rb_define_method(cNMatrix, "==",    (METHOD)nm_eqeq,        1);

  rb_define_method(cNMatrix, "+",      (METHOD)nm_ew_add,      1);
  rb_define_method(cNMatrix, "-",      (METHOD)nm_ew_subtract,  1);
  rb_define_method(cNMatrix, "*",      (METHOD)nm_ew_multiply,  1);
  rb_define_method(cNMatrix, "/",      (METHOD)nm_ew_divide,    1);
  rb_define_method(cNMatrix, "**",    (METHOD)nm_ew_power,    1);
  rb_define_method(cNMatrix, "%",     (METHOD)nm_ew_mod,      1);

  rb_define_method(cNMatrix, "atan2", (METHOD)nm_noncom_ew_atan2, -1);
  rb_define_method(cNMatrix, "ldexp", (METHOD)nm_noncom_ew_ldexp, -1);
  rb_define_method(cNMatrix, "hypot", (METHOD)nm_noncom_ew_hypot, -1);

  rb_define_method(cNMatrix, "sin",   (METHOD)nm_unary_sin,   0);
  rb_define_method(cNMatrix, "cos",   (METHOD)nm_unary_cos,   0);
  rb_define_method(cNMatrix, "tan",   (METHOD)nm_unary_tan,   0);
  rb_define_method(cNMatrix, "asin",  (METHOD)nm_unary_asin,  0);
  rb_define_method(cNMatrix, "acos",  (METHOD)nm_unary_acos,  0);
  rb_define_method(cNMatrix, "atan",  (METHOD)nm_unary_atan,  0);
  rb_define_method(cNMatrix, "sinh",  (METHOD)nm_unary_sinh,  0);
  rb_define_method(cNMatrix, "cosh",  (METHOD)nm_unary_cosh,  0);
  rb_define_method(cNMatrix, "tanh",  (METHOD)nm_unary_tanh,  0);
  rb_define_method(cNMatrix, "asinh", (METHOD)nm_unary_asinh, 0);
  rb_define_method(cNMatrix, "acosh", (METHOD)nm_unary_acosh, 0);
  rb_define_method(cNMatrix, "atanh", (METHOD)nm_unary_atanh, 0);
  rb_define_method(cNMatrix, "exp",   (METHOD)nm_unary_exp,   0);
  rb_define_method(cNMatrix, "log2",  (METHOD)nm_unary_log2,  0);
  rb_define_method(cNMatrix, "log10", (METHOD)nm_unary_log10, 0);
  rb_define_method(cNMatrix, "sqrt",  (METHOD)nm_unary_sqrt,  0);
  rb_define_method(cNMatrix, "erf",   (METHOD)nm_unary_erf,   0);
  rb_define_method(cNMatrix, "erfc",  (METHOD)nm_unary_erfc,  0);
  rb_define_method(cNMatrix, "cbrt",  (METHOD)nm_unary_cbrt,  0);
  rb_define_method(cNMatrix, "gamma", (METHOD)nm_unary_gamma, 0);
  rb_define_method(cNMatrix, "log",   (METHOD)nm_unary_log,  -1);
  rb_define_method(cNMatrix, "-@",    (METHOD)nm_unary_negate,0);
  rb_define_method(cNMatrix, "floor", (METHOD)nm_unary_floor, 0);
  rb_define_method(cNMatrix, "ceil", (METHOD)nm_unary_ceil, 0);
  rb_define_method(cNMatrix, "round", (METHOD)nm_unary_round, -1);


  rb_define_method(cNMatrix, "=~", (METHOD)nm_ew_eqeq, 1);
  rb_define_method(cNMatrix, "!~", (METHOD)nm_ew_neq, 1);
  rb_define_method(cNMatrix, "<=", (METHOD)nm_ew_leq, 1);
  rb_define_method(cNMatrix, ">=", (METHOD)nm_ew_geq, 1);
  rb_define_method(cNMatrix, "<", (METHOD)nm_ew_lt, 1);
  rb_define_method(cNMatrix, ">", (METHOD)nm_ew_gt, 1);

  /////////////////////////////
  // Helper Instance Methods //
  /////////////////////////////
  rb_define_protected_method(cNMatrix, "__yale_vector_set__", (METHOD)nm_vector_set, -1);

  /////////////////////////
  // Matrix Math Methods //
  /////////////////////////
  rb_define_method(cNMatrix, "dot", (METHOD)nm_multiply, 1);
  rb_define_method(cNMatrix, "symmetric?", (METHOD)nm_symmetric, 0);
  rb_define_method(cNMatrix, "hermitian?", (METHOD)nm_hermitian, 0);
  rb_define_method(cNMatrix, "capacity", (METHOD)nm_capacity, 0);

  // protected methods
  rb_define_protected_method(cNMatrix, "__inverse__", (METHOD)nm_inverse, 2);
  rb_define_protected_method(cNMatrix, "__inverse_exact__", (METHOD)nm_inverse_exact, 3);

  // private methods
  rb_define_private_method(cNMatrix, "__hessenberg__", (METHOD)nm_hessenberg, 1);

  /////////////////
  // FFI Methods //
  /////////////////
  rb_define_method(cNMatrix, "data_pointer", (METHOD)nm_data_pointer, 0);

  /////////////
  // Aliases //
  /////////////

  rb_define_alias(cNMatrix, "dim", "dimensions");
  rb_define_alias(cNMatrix, "effective_dim", "effective_dimensions");
  rb_define_alias(cNMatrix, "equal?", "eql?");

  ////////////
  //Epsilons//
  ////////////
  rb_define_const(cNMatrix, "FLOAT64_EPSILON", rb_const_get(rb_cFloat, rb_intern("EPSILON")));
  rb_define_const(cNMatrix, "FLOAT32_EPSILON", DBL2NUM(FLT_EPSILON));

  ///////////////////////
  // Symbol Generation //
  ///////////////////////

  nm_init_ruby_constants();

  //////////////////////////
  // YaleFunctions module //
  //////////////////////////

  nm_init_yale_functions();

  /////////////////
  // BLAS module //
  /////////////////

  nm_math_init_blas();

  ///////////////
  // IO module //
  ///////////////
  nm_init_io();

  /////////////////////////////////////////////////
  // Force compilation of necessary constructors //
  /////////////////////////////////////////////////
  nm_init_data();
}


//////////////////
// Ruby Methods //
//////////////////

/*
 * Allocator.
 */
static VALUE nm_alloc(VALUE klass) {
  NMATRIX* mat = NM_ALLOC(NMATRIX);
  mat->storage = NULL;

  // DO NOT MARK This STRUCT. It has no storage allocated, and no stype, so mark will do an invalid something.
  return Data_Wrap_Struct(klass, NULL, nm_delete, mat);
}

/*
 * Find the capacity of an NMatrix. The capacity only differs from the size for
 * Yale matrices, which occasionally allocate more space than they need. For
 * list and dense, capacity gives the number of elements in the matrix.
 *
 * If you call this on a slice, it may behave unpredictably. Most likely it'll
 * just return the original matrix's capacity.
 */
static VALUE nm_capacity(VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));
  VALUE cap;

  switch(NM_STYPE(self)) {
  case nm::YALE_STORE:
    cap = UINT2NUM(reinterpret_cast<YALE_STORAGE*>(NM_STORAGE_YALE(self)->src)->capacity);
    break;

  case nm::DENSE_STORE:
    cap = UINT2NUM(nm_storage_count_max_elements( NM_STORAGE_DENSE(self) ));
    break;

  case nm::LIST_STORE:
    cap = UINT2NUM(nm_list_storage_count_elements( NM_STORAGE_LIST(self) ));
    break;

  default:
    NM_CONSERVATIVE(nm_unregister_value(&self));
    rb_raise(nm_eStorageTypeError, "unrecognized stype in nm_capacity()");
  }

  NM_CONSERVATIVE(nm_unregister_value(&self));
  return cap;
}


/*
 * Mark function.
 */
void nm_mark(NMATRIX* mat) {
  STYPE_MARK_TABLE(mark)
  mark[mat->stype](mat->storage);
}


/*
 * Destructor.
 */
void nm_delete(NMATRIX* mat) {
  static void (*ttable[nm::NUM_STYPES])(STORAGE*) = {
    nm_dense_storage_delete,
    nm_list_storage_delete,
    nm_yale_storage_delete
  };
  ttable[mat->stype](mat->storage);

  NM_FREE(mat);
}

/*
 * Slicing destructor.
 */
void nm_delete_ref(NMATRIX* mat) {
  static void (*ttable[nm::NUM_STYPES])(STORAGE*) = {
    nm_dense_storage_delete_ref,
    nm_list_storage_delete_ref,
    nm_yale_storage_delete_ref
  };
  ttable[mat->stype](mat->storage);

  NM_FREE(mat);
}


/**
 * These variables hold a linked list of VALUEs that are registered to be in
 * use by nmatrix so that they can be marked when GC runs.
 */
static VALUE* gc_value_holder = NULL;
static NM_GC_HOLDER* gc_value_holder_struct = NULL;
static NM_GC_HOLDER* allocated_pool = NULL; // an object pool for linked list nodes; using pooling is in some cases a substantial performance improvement

/**
 * GC Marking function for the values that have been registered.
 */
static void __nm_mark_value_container(NM_GC_HOLDER* gc_value_holder_struct) {
  if (gc_value_holder_struct && gc_value_holder_struct->start) {
    NM_GC_LL_NODE* curr = gc_value_holder_struct->start;
    while (curr) {
      rb_gc_mark_locations(curr->val, curr->val + curr->n);
      curr = curr->next;
    }
  }
}

/**
 * Initilalizes the linked list of in-use VALUEs if it hasn't been done
 * already.
 */
static void __nm_initialize_value_container() {
  if (gc_value_holder == NULL) {
    gc_value_holder_struct = NM_ALLOC_NONRUBY(NM_GC_HOLDER);
    allocated_pool = NM_ALLOC_NONRUBY(NM_GC_HOLDER);
    gc_value_holder = NM_ALLOC_NONRUBY(VALUE);
    gc_value_holder_struct->start = NULL;
    allocated_pool->start = NULL;
    *gc_value_holder = Data_Wrap_Struct(cNMatrix_GC_holder, __nm_mark_value_container, NULL, gc_value_holder_struct);
    rb_global_variable(gc_value_holder);
  }
}

/*
 * Register an array of VALUEs to avoid their collection
 * while using them internally.
 */
void nm_register_values(VALUE* values, size_t n) {
  if (!gc_value_holder_struct)
    __nm_initialize_value_container();
  if (values) {
    NM_GC_LL_NODE* to_insert = NULL;
    if (allocated_pool->start) {
      to_insert = allocated_pool->start;
      allocated_pool->start = to_insert->next;
    } else {
      to_insert = NM_ALLOC_NONRUBY(NM_GC_LL_NODE);
    }
    to_insert->val = values;
    to_insert->n = n;
    to_insert->next = gc_value_holder_struct->start;
    gc_value_holder_struct->start = to_insert;
  }
}

/*
 * Unregister an array of VALUEs with the gc to allow normal
 * garbage collection to occur again.
 */
void nm_unregister_values(VALUE* values, size_t n) {
  if (values) {
    if (gc_value_holder_struct) {
      NM_GC_LL_NODE* curr = gc_value_holder_struct->start;
      NM_GC_LL_NODE* last = NULL;
      while (curr) {
        if (curr->val == values) {
          if (last) {
            last->next = curr->next;
          } else {
            gc_value_holder_struct->start = curr->next;
          }
          curr->next = allocated_pool->start;
          curr->val = NULL;
          curr->n = 0;
          allocated_pool->start = curr;
          break;
        }
        last = curr;
        curr = curr->next;
      }
    }
  }
}


/**
 * Register a single VALUE as in use to avoid garbage collection.
 */
void nm_register_value(VALUE* val) {
  nm_register_values(val, 1);
}

/**
 * Unregister a single VALUE to allow normal garbage collection.
 */
void nm_unregister_value(VALUE* val) {
  nm_unregister_values(val, 1);
}

/**
 * Removes all instances of a single VALUE in the gc list.  This can be
 * dangerous.  Primarily used when something is about to be
 * freed and replaced so that and residual registrations won't access after
 * free.
 **/
void nm_completely_unregister_value(VALUE* val) {
  if (gc_value_holder_struct) {
    NM_GC_LL_NODE* curr = gc_value_holder_struct->start;
    NM_GC_LL_NODE* last = NULL;
    while (curr) {
      if (curr->val == val) {
        if (last) {
          last->next = curr->next;
        } else {
          gc_value_holder_struct->start = curr->next;
        }
        NM_GC_LL_NODE* temp_next = curr->next;
        curr->next = allocated_pool->start;
        curr->val = NULL;
        curr->n = 0;
        allocated_pool->start = curr;
        curr = temp_next;
      } else {
        last = curr;
        curr = curr->next;
      }
    }
  }
}


/**
 * Register a STORAGE struct of the supplied stype to avoid garbage collection
 * of its internals.
 *
 * Delegates to the storage-specific methods.  They will check dtype and ignore
 * non-rubyobject dtypes, so it's safe to pass any storage in.
 */
void nm_register_storage(nm::stype_t stype, const STORAGE* storage) {
  STYPE_REGISTER_TABLE(ttable);
  ttable[stype](storage);
}

/**
 * Unregister a STORAGE struct of the supplied stype to allow normal garbage collection
 * of its internals.
 *
 * Delegates to the storage-specific methods.  They will check dtype and ignore
 * non-rubyobject dtypes, so it's safe to pass any storage in.
 *
 */
void nm_unregister_storage(nm::stype_t stype, const STORAGE* storage) {
  STYPE_UNREGISTER_TABLE(ttable);
  ttable[stype](storage);
}

/**
 * Registers an NMATRIX struct to avoid garbage collection of its internals.
 */
void nm_register_nmatrix(NMATRIX* nmatrix) {
  if (nmatrix)
    nm_register_storage(nmatrix->stype, nmatrix->storage);
}

/**
 * Unregisters an NMATRIX struct to avoid garbage collection of its internals.
 */
void nm_unregister_nmatrix(NMATRIX* nmatrix) {
  if (nmatrix)
    nm_unregister_storage(nmatrix->stype, nmatrix->storage);
}

/*
 * call-seq:
 *     dtype -> Symbol
 *
 * Get the data type (dtype) of a matrix, e.g., :byte, :int8, :int16, :int32,
 * :int64, :float32, :float64, :complex64, :complex128,
 * or :object (the last is a Ruby object).
 */
static VALUE nm_dtype(VALUE self) {
  ID dtype = rb_intern(DTYPE_NAMES[NM_DTYPE(self)]);
  return ID2SYM(dtype);
}


/*
 * call-seq:
 *     upcast(first_dtype, second_dtype) -> Symbol
 *
 * Given a binary operation between types t1 and t2, what type will be returned?
 *
 * This is a singleton method on NMatrix, e.g., NMatrix.upcast(:int32, :int64)
 */
static VALUE nm_upcast(VALUE self, VALUE t1, VALUE t2) {
  nm::dtype_t d1    = nm_dtype_from_rbsymbol(t1),
              d2    = nm_dtype_from_rbsymbol(t2);

  return ID2SYM(rb_intern( DTYPE_NAMES[ Upcast[d1][d2] ] ));
}


/*
 * call-seq:
       default_value -> ...
 *
 * Get the default value for the matrix. For dense, this is undefined and will return Qnil. For list, it is user-defined.
 * For yale, it's going to be some variation on zero, but may be Qfalse or Qnil.
 */
static VALUE nm_default_value(VALUE self) {
  switch(NM_STYPE(self)) {
  case nm::YALE_STORE:
    return nm_yale_default_value(self);
  case nm::LIST_STORE:
    return nm_list_default_value(self);
  case nm::DENSE_STORE:
  default:
    return Qnil;
  }
}


/*
 * call-seq:
 *     each_with_indices -> Enumerator
 *
 * Iterate over all entries of any matrix in standard storage order (as with #each), and include the indices.
 */
static VALUE nm_each_with_indices(VALUE nmatrix) {
  NM_CONSERVATIVE(nm_register_value(&nmatrix));
  VALUE to_return = Qnil;

  switch(NM_STYPE(nmatrix)) {
  case nm::YALE_STORE:
    to_return = nm_yale_each_with_indices(nmatrix);
    break;
  case nm::DENSE_STORE:
    to_return = nm_dense_each_with_indices(nmatrix);
    break;
  case nm::LIST_STORE:
    to_return = nm_list_each_with_indices(nmatrix, false);
    break;
  default:
    NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
    rb_raise(nm_eDataTypeError, "Not a proper storage type");
  }

  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  return to_return;
}

/*
 * call-seq:
 *     each_stored_with_indices -> Enumerator
 *
 * Iterate over the stored entries of any matrix. For dense and yale, this iterates over non-zero
 * entries; for list, this iterates over non-default entries. Yields dim+1 values for each entry:
 * i, j, ..., and the entry itself.
 */
static VALUE nm_each_stored_with_indices(VALUE nmatrix) {
  NM_CONSERVATIVE(nm_register_value(&nmatrix));
  VALUE to_return = Qnil;

  switch(NM_STYPE(nmatrix)) {
  case nm::YALE_STORE:
    to_return = nm_yale_each_stored_with_indices(nmatrix);
    break;
  case nm::DENSE_STORE:
    to_return = nm_dense_each_with_indices(nmatrix);
    break;
  case nm::LIST_STORE:
    to_return = nm_list_each_with_indices(nmatrix, true);
    break;
  default:
    NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
    rb_raise(nm_eDataTypeError, "Not a proper storage type");
  }

  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  return to_return;
}


/*
 * call-seq:
 *     map_stored -> Enumerator
 *
 * Iterate over the stored entries of any matrix. For dense and yale, this iterates over non-zero
 * entries; for list, this iterates over non-default entries. Yields dim+1 values for each entry:
 * i, j, ..., and the entry itself.
 */
static VALUE nm_map_stored(VALUE nmatrix) {
  NM_CONSERVATIVE(nm_register_value(&nmatrix));
  VALUE to_return = Qnil;

  switch(NM_STYPE(nmatrix)) {
  case nm::YALE_STORE:
    to_return = nm_yale_map_stored(nmatrix);
    break;
  case nm::DENSE_STORE:
    to_return = nm_dense_map(nmatrix);
    break;
  case nm::LIST_STORE:
    to_return = nm_list_map_stored(nmatrix, Qnil);
    break;
  default:
    NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
    rb_raise(nm_eDataTypeError, "Not a proper storage type");
  }

  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  return to_return;
}

/*
 * call-seq:
 *     each_ordered_stored_with_indices -> Enumerator
 *
 * Very similar to #each_stored_with_indices. The key difference is that it enforces matrix ordering rather
 * than storage ordering, which only matters if your matrix is Yale.
 */
static VALUE nm_each_ordered_stored_with_indices(VALUE nmatrix) {
  NM_CONSERVATIVE(nm_register_value(&nmatrix));
  VALUE to_return = Qnil;

  switch(NM_STYPE(nmatrix)) {
  case nm::YALE_STORE:
    to_return = nm_yale_each_ordered_stored_with_indices(nmatrix);
    break;
  case nm::DENSE_STORE:
    to_return = nm_dense_each_with_indices(nmatrix);
    break;
  case nm::LIST_STORE:
    to_return = nm_list_each_with_indices(nmatrix, true);
    break;
  default:
    NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
    rb_raise(nm_eDataTypeError, "Not a proper storage type");
  }

  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  return to_return;
}


/*
 * Equality operator. Returns a single true or false value indicating whether
 * the matrices are equivalent.
 *
 * For elementwise, use =~ instead.
 *
 * This method will raise an exception if dimensions do not match.
 *
 * When stypes differ, this function calls a protected Ruby method.
 */
static VALUE nm_eqeq(VALUE left, VALUE right) {
  NM_CONSERVATIVE(nm_register_value(&left));
  NM_CONSERVATIVE(nm_register_value(&right));

  NMATRIX *l, *r;

  CheckNMatrixType(left);
  CheckNMatrixType(right);

  UnwrapNMatrix(left, l);
  UnwrapNMatrix(right, r);

  bool result = false;

  // Check that the shapes match before going any further.
  if (l->storage->dim != r->storage->dim) {
    NM_CONSERVATIVE(nm_unregister_value(&left));
    NM_CONSERVATIVE(nm_unregister_value(&right));
    rb_raise(nm_eShapeError, "cannot compare matrices with different dimension");
  }

  size_t dim = l->storage->dim;
  for (size_t i=0; i<dim; i++) {
    if (l->storage->shape[i] != r->storage->shape[i]) {
      NM_CONSERVATIVE(nm_unregister_value(&left));
      NM_CONSERVATIVE(nm_unregister_value(&right));
      rb_raise(nm_eShapeError, "cannot compare matrices with different shapes");
    }
  }

  if (l->stype != r->stype) { // DIFFERENT STYPES

    if (l->stype == nm::DENSE_STORE)
      result = rb_funcall(left, rb_intern("dense_eql_sparse?"), 1, right);
    else if (r->stype == nm::DENSE_STORE)
      result = rb_funcall(right, rb_intern("dense_eql_sparse?"), 1, left);
    else
      result = rb_funcall(left, rb_intern("sparse_eql_sparse?"), 1, right);

  } else {

    switch(l->stype) {       // SAME STYPES
    case nm::DENSE_STORE:
      result = nm_dense_storage_eqeq(l->storage, r->storage);
      break;
    case nm::LIST_STORE:
      result = nm_list_storage_eqeq(l->storage, r->storage);
      break;
    case nm::YALE_STORE:
      result = nm_yale_storage_eqeq(l->storage, r->storage);
      break;
    }
  }

  NM_CONSERVATIVE(nm_unregister_value(&left));
  NM_CONSERVATIVE(nm_unregister_value(&right));

  return result ? Qtrue : Qfalse;
}

DEF_ELEMENTWISE_RUBY_ACCESSOR(ADD, add)
DEF_ELEMENTWISE_RUBY_ACCESSOR(SUB, subtract)
DEF_ELEMENTWISE_RUBY_ACCESSOR(MUL, multiply)
DEF_ELEMENTWISE_RUBY_ACCESSOR(DIV, divide)
DEF_ELEMENTWISE_RUBY_ACCESSOR(POW, power)
DEF_ELEMENTWISE_RUBY_ACCESSOR(MOD, mod)
DEF_ELEMENTWISE_RUBY_ACCESSOR(EQEQ, eqeq)
DEF_ELEMENTWISE_RUBY_ACCESSOR(NEQ, neq)
DEF_ELEMENTWISE_RUBY_ACCESSOR(LEQ, leq)
DEF_ELEMENTWISE_RUBY_ACCESSOR(GEQ, geq)
DEF_ELEMENTWISE_RUBY_ACCESSOR(LT, lt)
DEF_ELEMENTWISE_RUBY_ACCESSOR(GT, gt)

DEF_UNARY_RUBY_ACCESSOR(SIN, sin)
DEF_UNARY_RUBY_ACCESSOR(COS, cos)
DEF_UNARY_RUBY_ACCESSOR(TAN, tan)
DEF_UNARY_RUBY_ACCESSOR(ASIN, asin)
DEF_UNARY_RUBY_ACCESSOR(ACOS, acos)
DEF_UNARY_RUBY_ACCESSOR(ATAN, atan)
DEF_UNARY_RUBY_ACCESSOR(SINH, sinh)
DEF_UNARY_RUBY_ACCESSOR(COSH, cosh)
DEF_UNARY_RUBY_ACCESSOR(TANH, tanh)
DEF_UNARY_RUBY_ACCESSOR(ASINH, asinh)
DEF_UNARY_RUBY_ACCESSOR(ACOSH, acosh)
DEF_UNARY_RUBY_ACCESSOR(ATANH, atanh)
DEF_UNARY_RUBY_ACCESSOR(EXP, exp)
DEF_UNARY_RUBY_ACCESSOR(LOG2, log2)
DEF_UNARY_RUBY_ACCESSOR(LOG10, log10)
DEF_UNARY_RUBY_ACCESSOR(SQRT, sqrt)
DEF_UNARY_RUBY_ACCESSOR(ERF, erf)
DEF_UNARY_RUBY_ACCESSOR(ERFC, erfc)
DEF_UNARY_RUBY_ACCESSOR(CBRT, cbrt)
DEF_UNARY_RUBY_ACCESSOR(GAMMA, gamma)
DEF_UNARY_RUBY_ACCESSOR(NEGATE, negate)
DEF_UNARY_RUBY_ACCESSOR(FLOOR, floor)
DEF_UNARY_RUBY_ACCESSOR(CEIL, ceil)

DEF_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(ATAN2, atan2)
DEF_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(LDEXP, ldexp)
DEF_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(HYPOT, hypot)

static VALUE nm_unary_log(int argc, VALUE* argv, VALUE self) {
  NM_CONSERVATIVE(nm_register_values(argv, argc));
  const double default_log_base = exp(1.0);
  NMATRIX* left;
  UnwrapNMatrix(self, left);
  std::string sym;

  switch(left->stype) {
  case nm::DENSE_STORE:
    sym = "__dense_unary_log__";
    break;
  case nm::YALE_STORE:
    sym = "__yale_unary_log__";
    break;
  case nm::LIST_STORE:
    sym = "__list_unary_log__";
    break;
  }
  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
  if (argc > 0) { //supplied a base
    return rb_funcall(self, rb_intern(sym.c_str()), 1, argv[0]);
  }
  return rb_funcall(self, rb_intern(sym.c_str()), 1, nm::RubyObject(default_log_base).rval);
}

static VALUE nm_unary_round(int argc, VALUE* argv, VALUE self) {
  NM_CONSERVATIVE(nm_register_values(argv, argc));
  const int default_precision = 0;
  NMATRIX* left;
  UnwrapNMatrix(self, left);
  std::string sym;

  switch(left->stype) {
  case nm::DENSE_STORE:
    sym = "__dense_unary_round__";
    break;
  case nm::YALE_STORE:
    sym = "__yale_unary_round__";
    break;
  case nm::LIST_STORE:
    sym = "__list_unary_round__";
    break;
  }
  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
  if (argc > 0) { //supplied precision
    return rb_funcall(self, rb_intern(sym.c_str()), 1, argv[0]);
  }
  return rb_funcall(self, rb_intern(sym.c_str()), 1, nm::RubyObject(default_precision).rval);
}

//DEF_ELEMENTWISE_RUBY_ACCESSOR(ATAN2, atan2)
//DEF_ELEMENTWISE_RUBY_ACCESSOR(LDEXP, ldexp)
//DEF_ELEMENTWISE_RUBY_ACCESSOR(HYPOT, hypot)

/*
 * call-seq:
 *     hermitian? -> Boolean
 *
 * Is this matrix hermitian?
 *
 * Definition: http://en.wikipedia.org/wiki/Hermitian_matrix
 *
 * For non-complex matrices, this function should return the same result as symmetric?.
 */
static VALUE nm_hermitian(VALUE self) {
  return is_symmetric(self, true);
}


/*
 * call-seq:
 *     complex_conjugate_bang -> NMatrix
 *
 * Transform the matrix (in-place) to its complex conjugate. Only works on complex matrices.
 *
 * Bang should imply that no copy is being made, even temporarily.
 */
static VALUE nm_complex_conjugate_bang(VALUE self) {

  NMATRIX* m;
  void* elem;
  size_t size, p;

  UnwrapNMatrix(self, m);

  if (m->stype == nm::DENSE_STORE) {

    size = nm_storage_count_max_elements(NM_STORAGE(self));
    elem = NM_STORAGE_DENSE(self)->elements;

  } else if (m->stype == nm::YALE_STORE) {

    size = nm_yale_storage_get_size(NM_STORAGE_YALE(self));
    elem = NM_STORAGE_YALE(self)->a;

  } else {
    rb_raise(rb_eNotImpError, "please cast to yale or dense (complex) first");
  }

  // Walk through and negate the imaginary component
  if (NM_DTYPE(self) == nm::COMPLEX64) {

    for (p = 0; p < size; ++p) {
      reinterpret_cast<nm::Complex64*>(elem)[p].i = -reinterpret_cast<nm::Complex64*>(elem)[p].i;
    }

  } else if (NM_DTYPE(self) == nm::COMPLEX128) {

    for (p = 0; p < size; ++p) {
      reinterpret_cast<nm::Complex128*>(elem)[p].i = -reinterpret_cast<nm::Complex128*>(elem)[p].i;
    }

  }
  return self;
}

/*
 * call-seq:
 *     __reshape!__ -> NMatrix
 *
 * Reshapes the matrix (in-place) to the desired shape. Note that this function does not do a resize; the product of
 * the new and old shapes' components must be equal.
 *
 */
static VALUE nm_reshape_bang(VALUE self, VALUE arg){
  NMATRIX* m;
  UnwrapNMatrix(self, m);
  if(m->stype == nm::DENSE_STORE){
    DENSE_STORAGE* s   = NM_STORAGE_DENSE(self);
    VALUE shape_ary = arg;
    size_t dim;
    size_t size = nm_storage_count_max_elements(s);
    size_t new_size = 1;
    size_t* shape = interpret_shape(shape_ary, &dim);
    for (size_t index = 0; index < dim; ++index){
      new_size *= shape[index];}

    if (size == new_size){
      s->shape = shape;
      s->dim = dim;
      NM_FREE(s->offset);
      s->offset = NM_ALLOC_N(size_t, dim);
      memset(s->offset, 0, sizeof(size_t)*dim);
      size_t i, j;
      size_t* stride = NM_ALLOC_N(size_t, dim);
      for (i = 0; i < dim; ++i) {
        stride[i] = 1;
        for (j = i+1; j < dim; ++j) {
          stride[i] *= shape[j];
        }
      }
      NM_FREE(s->stride);
      s->stride = stride;
      return self;
     }
     else
       rb_raise(rb_eArgError, "reshape cannot resize; size of new and old matrices must match");
  }
  else {
    rb_raise(rb_eNotImpError, "reshape in place only for dense stype");
  }
}

/*
 * Helper function for creating a matrix. You have to create the storage and pass it in, but you don't
 * need to worry about deleting it.
 */
NMATRIX* nm_create(nm::stype_t stype, STORAGE* storage) {
  nm_register_storage(stype, storage);
  NMATRIX* mat = NM_ALLOC(NMATRIX);

  mat->stype   = stype;
  mat->storage = storage;

  nm_unregister_storage(stype, storage);
  return mat;
}

/*
 * @see nm_init
 */
static VALUE nm_init_new_version(int argc, VALUE* argv, VALUE self) {
  NM_CONSERVATIVE(nm_register_values(argv, argc));
  NM_CONSERVATIVE(nm_register_value(&self));
  VALUE shape_ary, initial_ary, hash;
  //VALUE shape_ary, default_val, capacity, initial_ary, dtype_sym, stype_sym;
  // Mandatory args: shape, dtype, stype
  rb_scan_args(argc, argv, "11:", &shape_ary, &initial_ary, &hash); // &stype_sym, &dtype_sym, &default_val, &capacity);

  NM_CONSERVATIVE(nm_register_value(&shape_ary));
  NM_CONSERVATIVE(nm_register_value(&initial_ary));
  NM_CONSERVATIVE(nm_register_value(&hash));
  // Get the shape.
  size_t  dim;
  size_t* shape = interpret_shape(shape_ary, &dim);
  void*   init;
  void*   v = NULL;
  size_t  v_size = 0;

  nm::stype_t stype = nm::DENSE_STORE;
  nm::dtype_t dtype = nm::RUBYOBJ;
  VALUE dtype_sym = Qnil, stype_sym = Qnil, default_val_num = Qnil, capacity_num = Qnil;
  size_t capacity = 0;
  if (!NIL_P(hash)) {
    dtype_sym       = rb_hash_aref(hash, ID2SYM(nm_rb_dtype));
    stype_sym       = rb_hash_aref(hash, ID2SYM(nm_rb_stype));
    capacity_num    = rb_hash_aref(hash, ID2SYM(nm_rb_capacity));
    NM_CONSERVATIVE(nm_register_value(&capacity_num));
    default_val_num = rb_hash_aref(hash, ID2SYM(nm_rb_default));
    NM_CONSERVATIVE(nm_register_value(&default_val_num));
  }

  //     stype ||= :dense
  stype = !NIL_P(stype_sym) ? nm_stype_from_rbsymbol(stype_sym) : nm::DENSE_STORE;

  //     dtype ||= h[:dtype] || guess_dtype(initial_ary) || :object
  if (NIL_P(initial_ary) && NIL_P(dtype_sym))
    dtype = nm::RUBYOBJ;
  else if (NIL_P(dtype_sym))
    dtype = nm_dtype_guess(initial_ary);
  else
    dtype = nm_dtype_from_rbsymbol(dtype_sym);

  //   if stype != :dense
  //     if initial_ary.nil?
  //       init = h[:default] || 0
  //     elsif initial_ary.is_a?(Array)
  //       init = initial_ary.size > 1 ? (h[:default] || 0) : initial_ary[0]
  //     else
  //       init = initial_ary # not an array, just a value
  //     end
  //   end
  if (stype != nm::DENSE_STORE) {
    if (!NIL_P(default_val_num))
      init = rubyobj_to_cval(default_val_num, dtype);
    else if (NIL_P(initial_ary))
      init = NULL;
    else if (RB_TYPE_P(initial_ary, T_ARRAY))
      init = RARRAY_LEN(initial_ary) == 1 ? rubyobj_to_cval(rb_ary_entry(initial_ary, 0), dtype) : NULL;
    else
      init = rubyobj_to_cval(initial_ary, dtype);

    if (dtype == nm::RUBYOBJ) {
      nm_register_values(reinterpret_cast<VALUE*>(init), 1);
    }
  }

  // capacity = h[:capacity] || 0
  if (stype == nm::YALE_STORE) {
    if (!NIL_P(capacity_num)) capacity = FIX2INT(capacity_num);
  }

  if (!NIL_P(initial_ary)) {

    if (RB_TYPE_P(initial_ary, T_ARRAY)) v_size = RARRAY_LEN(initial_ary);
    else                                 v_size = 1;

    v = interpret_initial_value(initial_ary, dtype);

    if (dtype == nm::RUBYOBJ) {
      nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
    }
  }

  // :object matrices MUST be initialized.
  else if (stype == nm::DENSE_STORE && dtype == nm::RUBYOBJ) {
    // Pretend [nil] was passed for RUBYOBJ.
    v          = NM_ALLOC(VALUE);
    *(VALUE*)v = Qnil;

    v_size = 1;

  }

  NMATRIX* nmatrix;
  UnwrapNMatrix(self, nmatrix);

  nmatrix->stype = stype;

  switch (stype) {
    case nm::DENSE_STORE:
      nmatrix->storage = (STORAGE*)nm_dense_storage_create(dtype, shape, dim, v, v_size);
      break;

    case nm::LIST_STORE:
      nmatrix->storage = (STORAGE*)nm_list_storage_create(dtype, shape, dim, init);
      break;

    case nm::YALE_STORE:
      nmatrix->storage = (STORAGE*)nm_yale_storage_create(dtype, shape, dim, capacity);
      nm_yale_storage_init((YALE_STORAGE*)(nmatrix->storage), init);
      break;
  }

  nm_register_storage(stype, nmatrix->storage);

  // If we're not creating a dense, and an initial array was provided, use that and multi-slice-set
  // to set the contents of the matrix right now.
  if (stype != nm::DENSE_STORE && v_size > 1) {
    VALUE* slice_argv = NM_ALLOCA_N(VALUE, dim);
    nm_register_values(slice_argv, dim);
    size_t* tmp_shape = NM_ALLOC_N(size_t, dim);
    for (size_t m = 0; m < dim; ++m) {
      slice_argv[m] = ID2SYM(nm_rb_mul); // :* -- full range
      tmp_shape[m]  = shape[m];
    }

    SLICE slice_s;
    SLICE* slice = &slice_s;
    slice->coords = NM_ALLOCA_N(size_t, dim);
    slice->lengths = NM_ALLOCA_N(size_t, dim);
    init_slice_no_alloc(slice, dim, dim, slice_argv, shape);

    // Create a temporary dense matrix and use it to do a slice assignment on self.
    NMATRIX* tmp = nm_create(nm::DENSE_STORE, (STORAGE*)nm_dense_storage_create(dtype, tmp_shape, dim, v, v_size));
    nm_register_nmatrix(tmp);
    VALUE rb_tmp = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, tmp);
    nm_unregister_nmatrix(tmp);
    nm_register_value(&rb_tmp);
    if (stype == nm::YALE_STORE)  nm_yale_storage_set(self, slice, rb_tmp);
    else                          nm_list_storage_set(self, slice, rb_tmp);

    // We need to free v if it's not the same size as tmp -- because tmp will have made a copy instead.
    //if (nm_storage_count_max_elements(tmp->storage) != v_size)
    //  NM_FREE(v);

    // nm_delete(tmp); // This seems to enrage the garbage collector (because rb_tmp is still available). It'd be better if we could force it to free immediately, but no sweat.

    nm_unregister_value(&rb_tmp);
    nm_unregister_values(slice_argv, dim);
  }

  if (!NIL_P(initial_ary) && dtype == nm::RUBYOBJ) {
    nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
  }

  if (stype != nm::DENSE_STORE && dtype == nm::RUBYOBJ) {
    nm_unregister_values(reinterpret_cast<VALUE*>(init), 1);
  }

  if (!NIL_P(hash)) {
    NM_CONSERVATIVE(nm_unregister_value(&capacity_num));
    NM_CONSERVATIVE(nm_unregister_value(&default_val_num));
  }

  NM_CONSERVATIVE(nm_unregister_value(&shape_ary));
  NM_CONSERVATIVE(nm_unregister_value(&initial_ary));
  NM_CONSERVATIVE(nm_unregister_value(&hash));

  NM_CONSERVATIVE(nm_unregister_value(&self));
  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
  nm_unregister_storage(stype, nmatrix->storage);

  return self;
}

/*
 * call-seq:
 *     new(shape) -> NMatrix
 *     new(shape, initial_value) -> NMatrix
 *     new(shape, initial_array) -> NMatrix
 *     new(shape, initial_value, options) -> NMatrix
 *     new(shape, initial_array, options) -> NMatrix
 *
 * Create a new NMatrix.
 *
 * The only mandatory argument is shape, which may be a positive integer or an array of positive integers.
 *
 * It is recommended that you supply an initialization value or array of values. Without one, Yale and List matrices will
 * be initialized to 0; and dense matrices will be undefined.
 *
 * Additional options may be provided using keyword arguments. The keywords are +:dtype, +:stype+, +:capacity+, and
 * +:default+. Only Yale uses a capacity argument, which is used to reserve the initial size of its storage vectors.
 * List and Yale both accept a default value (which itself defaults to 0). This default is taken from the initial value
 * if such a value is given; it is more likely to be required when an initial array is provided.
 *
 * The storage type, or stype, is used to specify whether we want a +:dense+, +:list+, or +:yale+ matrix; dense is the
 * default.
 *
 * The data type, or dtype, can be one of: :byte, :int8, :int16, :int32, :int64, :float32, :float64, :complex64,
 * :complex128, or :object. The constructor will attempt to guess it from the initial value/array/default
 * provided, if any. Otherwise, the default is :object, which stores any type of Ruby object.
 *
 * In addition to the above, there is a legacy constructor from the alpha version. To use that version, you must be
 * providing exactly four arguments. It is now deprecated.
 *
 * There is one additional constructor for advanced users, which takes seven arguments and is only for creating Yale
 * matrices with known IA, JA, and A arrays. This is used primarily internally for IO, e.g., reading Matlab matrices,
 * which are stored in old Yale (not our Yale) format. But be careful; there are no overflow warnings. All of these
 * constructors are defined for power-users. Everyone else should probably resort to the shortcut functions defined in
 * shortcuts.rb.
 */
static VALUE nm_init(int argc, VALUE* argv, VALUE nm) {
  NM_CONSERVATIVE(nm_register_value(&nm));
  NM_CONSERVATIVE(nm_register_values(argv, argc));

  if (argc <= 3) { // Call the new constructor unless all four arguments are given (or the 7-arg version is given)
    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
    NM_CONSERVATIVE(nm_unregister_value(&nm));
    return nm_init_new_version(argc, argv, nm);
  }

  /* First, determine stype (dense by default) */
  nm::stype_t stype;
  size_t  offset = 0;

  if (!SYMBOL_P(argv[0]) && !RB_TYPE_P(argv[0], T_STRING)) {
    stype = nm::DENSE_STORE;

  } else {
    // 0: String or Symbol
    stype  = interpret_stype(argv[0]);
    offset = 1;
  }

  // If there are 7 arguments and Yale, refer to a different init function with fewer sanity checks.
  if (argc == 7) {
    if (stype == nm::YALE_STORE) {
      NM_CONSERVATIVE(nm_unregister_values(argv, argc));
      NM_CONSERVATIVE(nm_unregister_value(&nm));
      return nm_init_yale_from_old_yale(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], nm);

    } else {
      NM_CONSERVATIVE(nm_unregister_values(argv, argc));
      NM_CONSERVATIVE(nm_unregister_value(&nm));
      rb_raise(rb_eArgError, "Expected 2-4 arguments (or 7 for internal Yale creation)");
    }
  }

  // 1: Array or Fixnum
  size_t dim;
  size_t* shape = interpret_shape(argv[offset], &dim);

  // 2-3: dtype
  nm::dtype_t dtype = interpret_dtype(argc-1-offset, argv+offset+1, stype);

  size_t init_cap = 0, init_val_len = 0;
  void* init_val  = NULL;
  if (!SYMBOL_P(argv[1+offset]) || RB_TYPE_P(argv[1+offset], T_ARRAY)) {
    // Initial value provided (could also be initial capacity, if yale).

    if (stype == nm::YALE_STORE && NM_RUBYVAL_IS_NUMERIC(argv[1+offset])) {
      init_cap = FIX2UINT(argv[1+offset]);

    } else {
      // 4: initial value / dtype
      init_val = interpret_initial_value(argv[1+offset], dtype);

      if (RB_TYPE_P(argv[1+offset], T_ARRAY)) init_val_len = RARRAY_LEN(argv[1+offset]);
      else                                    init_val_len = 1;
    }

  } else {
    // DType is RUBYOBJ.

    if (stype == nm::DENSE_STORE) {
      /*
       * No need to initialize dense with any kind of default value unless it's
       * an RUBYOBJ matrix.
       */
      if (dtype == nm::RUBYOBJ) {
        // Pretend [nil] was passed for RUBYOBJ.
        init_val = NM_ALLOC(VALUE);
        *(VALUE*)init_val = Qnil;

        init_val_len = 1;

      } else {
        init_val = NULL;
      }
    } else if (stype == nm::LIST_STORE) {
      init_val = NM_ALLOC_N(char, DTYPE_SIZES[dtype]);
      std::memset(init_val, 0, DTYPE_SIZES[dtype]);
    }
  }

  if (dtype == nm::RUBYOBJ) {
    nm_register_values(reinterpret_cast<VALUE*>(init_val), init_val_len);
  }

  // TODO: Update to allow an array as the initial value.
  NMATRIX* nmatrix;
  UnwrapNMatrix(nm, nmatrix);

  nmatrix->stype = stype;

  switch (stype) {
    case nm::DENSE_STORE:
      nmatrix->storage = (STORAGE*)nm_dense_storage_create(dtype, shape, dim, init_val, init_val_len);
      break;

    case nm::LIST_STORE:
      nmatrix->storage = (STORAGE*)nm_list_storage_create(dtype, shape, dim, init_val);
      break;

    case nm::YALE_STORE:
      nmatrix->storage = (STORAGE*)nm_yale_storage_create(dtype, shape, dim, init_cap);
      nm_yale_storage_init((YALE_STORAGE*)(nmatrix->storage), NULL);
      break;
  }

  if (dtype == nm::RUBYOBJ) {
    nm_unregister_values(reinterpret_cast<VALUE*>(init_val), init_val_len);
  }

  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
  NM_CONSERVATIVE(nm_unregister_value(&nm));

  return nm;
}


/*
 * Helper for nm_cast_with_types which uses the C types instead of the Ruby objects. 
 * Called by nm_cast_with_types.
 */
NMATRIX* nm_cast_with_ctype_args(NMATRIX* self, nm::stype_t new_stype, nm::dtype_t new_dtype, void* init_ptr) {

  nm_register_nmatrix(self);

  NMATRIX* lhs = NM_ALLOC(NMATRIX);
  lhs->stype   = new_stype;

  // Copy the storage
  CAST_TABLE(cast_copy);
  lhs->storage = cast_copy[lhs->stype][self->stype](self->storage, new_dtype, init_ptr);

  nm_unregister_nmatrix(self);

  return lhs;
}

/*
 * Cast NMatrix with given new_stype and new_dtype. Called by nm_cast.
 */
VALUE nm_cast_with_types(VALUE self, nm::stype_t new_stype, nm::dtype_t new_dtype, 
        void* init_ptr) {
  NMATRIX *rhs;

  UnwrapNMatrix( self, rhs );

  NMATRIX* m = nm_cast_with_ctype_args(rhs, new_stype, new_dtype, init_ptr);
  nm_register_nmatrix(m);

  VALUE to_return = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, m);

  nm_unregister_nmatrix(m);
  return to_return;
}

/*
 * call-seq:
 *     cast_full(stype) -> NMatrix
 *     cast_full(stype, dtype, sparse_basis) -> NMatrix
 *
 * Copy constructor for changing dtypes and stypes.
 */
VALUE nm_cast(VALUE self, VALUE new_stype_symbol, VALUE new_dtype_symbol, VALUE init) {
  NM_CONSERVATIVE(nm_register_value(&self));
  NM_CONSERVATIVE(nm_register_value(&init));

  nm::dtype_t new_dtype = nm_dtype_from_rbsymbol(new_dtype_symbol);
  nm::stype_t new_stype = nm_stype_from_rbsymbol(new_stype_symbol);

  CheckNMatrixType(self);
  void* init_ptr = NM_ALLOCA_N(char, DTYPE_SIZES[new_dtype]);
  rubyval_to_cval(init, new_dtype, init_ptr);

  VALUE to_return = nm_cast_with_types(self, new_stype, new_dtype, init_ptr);

  NM_CONSERVATIVE(nm_unregister_value(&self));
  NM_CONSERVATIVE(nm_unregister_value(&init));
  return to_return;

}

/*
 * Copy constructor for transposing.
 */
static VALUE nm_init_transposed(VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));

  static STORAGE* (*storage_copy_transposed[nm::NUM_STYPES])(const STORAGE* rhs_base) = {
    nm_dense_storage_copy_transposed,
    nm_list_storage_copy_transposed,
    nm_yale_storage_copy_transposed
  };

  NMATRIX* lhs = nm_create( NM_STYPE(self),
                            storage_copy_transposed[NM_STYPE(self)]( NM_STORAGE(self) )
                          );
  nm_register_nmatrix(lhs);
  VALUE to_return = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, lhs);

  nm_unregister_nmatrix(lhs);
  NM_CONSERVATIVE(nm_unregister_value(&self));
  return to_return;
}

/*
 * Copy constructor for no change of dtype or stype (used for #initialize_copy hook).
 */
static VALUE nm_init_copy(VALUE copy, VALUE original) {
  NM_CONSERVATIVE(nm_register_value(&copy));
  NM_CONSERVATIVE(nm_register_value(&original));

  NMATRIX *lhs, *rhs;

  CheckNMatrixType(original);

  if (copy == original) {
    NM_CONSERVATIVE(nm_unregister_value(&copy));
    NM_CONSERVATIVE(nm_unregister_value(&original));
    return copy;
  }

  UnwrapNMatrix( original, rhs );
  UnwrapNMatrix( copy,     lhs );

  lhs->stype = rhs->stype;

  // Copy the storage
  CAST_TABLE(ttable);
  lhs->storage = ttable[lhs->stype][rhs->stype](rhs->storage, rhs->storage->dtype, NULL);

  NM_CONSERVATIVE(nm_unregister_value(&copy));
  NM_CONSERVATIVE(nm_unregister_value(&original));

  return copy;
}

/*
 * Get major, minor, and release components of NMatrix::VERSION. Store in function parameters. Doesn't get
 * the "pre" field currently (beta1/rc1/etc).
 */
static void get_version_info(uint16_t& major, uint16_t& minor, uint16_t& release) {
  // Get VERSION and split it on periods. Result is an Array.
  VALUE cVersion = rb_const_get(cNMatrix, rb_intern("VERSION"));

  // Convert each to an integer
  major   = FIX2INT(rb_const_get(cVersion, rb_intern("MAJOR")));
  minor   = FIX2INT(rb_const_get(cVersion, rb_intern("MINOR")));
  release = FIX2INT(rb_const_get(cVersion, rb_intern("TINY")));
}


/*
 * Interpret the NMatrix::write symmetry argument (which should be nil or a symbol). Return a symm_t (enum).
 */
static nm::symm_t interpret_symm(VALUE symm) {
  if (symm == Qnil) return nm::NONSYMM;

  ID rb_symm = rb_intern("symmetric"),
     rb_skew = rb_intern("skew"),
     rb_herm = rb_intern("hermitian");
     // nm_rb_upper, nm_rb_lower already set

  ID symm_id = rb_to_id(symm);

  if (symm_id == rb_symm)            return nm::SYMM;
  else if (symm_id == rb_skew)       return nm::SKEW;
  else if (symm_id == rb_herm)       return nm::HERM;
  else if (symm_id == nm_rb_upper)   return nm::UPPER;
  else if (symm_id == nm_rb_lower)   return nm::LOWER;
  else                            rb_raise(rb_eArgError, "unrecognized symmetry argument");

  return nm::NONSYMM;
}


void read_padded_shape(std::ifstream& f, size_t dim, size_t* shape) {
  size_t bytes_read = 0;

  // Read shape
  for (size_t i = 0; i < dim; ++i) {
    size_t s;
    f.read(reinterpret_cast<char*>(&s), sizeof(size_t));
    shape[i] = s;

    bytes_read += sizeof(size_t);
  }

  // Ignore padding
  f.ignore(bytes_read % 8);
}


void write_padded_shape(std::ofstream& f, size_t dim, size_t* shape) {
  size_t bytes_written = 0;

  // Write shape
  for (size_t i = 0; i < dim; ++i) {
    size_t s = shape[i];
    f.write(reinterpret_cast<const char*>(&s), sizeof(size_t));

    bytes_written += sizeof(size_t);
  }

  // Pad with zeros
  size_t zero = 0;
  while (bytes_written % 8) {
    f.write(reinterpret_cast<const char*>(&zero), sizeof(size_t));

    bytes_written += sizeof(IType);
  }
}


void read_padded_yale_elements(std::ifstream& f, YALE_STORAGE* storage, size_t length, nm::symm_t symm, nm::dtype_t dtype) {
  NAMED_DTYPE_TEMPLATE_TABLE_NO_ROBJ(ttable, nm::read_padded_yale_elements, void, std::ifstream&, YALE_STORAGE*, size_t, nm::symm_t)

  ttable[dtype](f, storage, length, symm);
}


void write_padded_yale_elements(std::ofstream& f, YALE_STORAGE* storage, size_t length, nm::symm_t symm, nm::dtype_t dtype) {
  NAMED_DTYPE_TEMPLATE_TABLE_NO_ROBJ(ttable, nm::write_padded_yale_elements, void, std::ofstream& f, YALE_STORAGE*, size_t, nm::symm_t)

  ttable[dtype](f, storage, length, symm);
}


void read_padded_dense_elements(std::ifstream& f, DENSE_STORAGE* storage, nm::symm_t symm, nm::dtype_t dtype) {
  NAMED_DTYPE_TEMPLATE_TABLE_NO_ROBJ(ttable, nm::read_padded_dense_elements, void, std::ifstream&, DENSE_STORAGE*, nm::symm_t)

  ttable[dtype](f, storage, symm);
}


void write_padded_dense_elements(std::ofstream& f, DENSE_STORAGE* storage, nm::symm_t symm, nm::dtype_t dtype) {
  NAMED_DTYPE_TEMPLATE_TABLE_NO_ROBJ(ttable, nm::write_padded_dense_elements, void, std::ofstream& f, DENSE_STORAGE*, nm::symm_t)

  ttable[dtype](f, storage, symm);
}


/*
 * Helper function to get exceptions in the module Errno (e.g., ENOENT). Example:
 *
 *     rb_raise(rb_get_errno_exc("ENOENT"), RSTRING_PTR(filename));
 */
static VALUE rb_get_errno_exc(const char* which) {
  return rb_const_get(rb_const_get(rb_cObject, rb_intern("Errno")), rb_intern(which));
}


/*
 * Binary file writer for NMatrix standard format. file should be a path, which we aren't going to
 * check very carefully (in other words, this function should generally be called from a Ruby
 * helper method). Function also takes a symmetry argument, which allows us to specify that we only want to
 * save the upper triangular portion of the matrix (or if the matrix is a lower triangular matrix, only
 * the lower triangular portion). nil means regular storage.
 */
static VALUE nm_write(int argc, VALUE* argv, VALUE self) {
  using std::ofstream;

  if (argc < 1 || argc > 2) {
    rb_raise(rb_eArgError, "Expected one or two arguments");
  }

  NM_CONSERVATIVE(nm_register_values(argv, argc));
  NM_CONSERVATIVE(nm_register_value(&self));

  VALUE file = argv[0],
        symm = argc == 1 ? Qnil : argv[1];

  NMATRIX* nmatrix;
  UnwrapNMatrix( self, nmatrix );

  nm::symm_t symm_ = interpret_symm(symm);

  if (nmatrix->storage->dtype == nm::RUBYOBJ) {
    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    rb_raise(rb_eNotImpError, "Ruby Object writing is not implemented yet");
  }

  // Get the dtype, stype, itype, and symm and ensure they're the correct number of bytes.
  uint8_t st = static_cast<uint8_t>(nmatrix->stype),
          dt = static_cast<uint8_t>(nmatrix->storage->dtype),
          sm = static_cast<uint8_t>(symm_);
  uint16_t dim = nmatrix->storage->dim;

  //FIXME: Cast the matrix to the smallest possible index type. Write that in the place of IType.

  // Check arguments before starting to write.
  if (nmatrix->stype == nm::LIST_STORE) {
    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    rb_raise(nm_eStorageTypeError, "cannot save list matrix; cast to yale or dense first");
  }
  if (symm_ != nm::NONSYMM) {
    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
    NM_CONSERVATIVE(nm_unregister_value(&self));

    if (dim != 2) rb_raise(rb_eArgError, "symmetry/triangularity not defined for a non-2D matrix");
    if (nmatrix->storage->shape[0] != nmatrix->storage->shape[1])
      rb_raise(rb_eArgError, "symmetry/triangularity not defined for a non-square matrix");
    if (symm_ == nm::HERM &&
          dt != static_cast<uint8_t>(nm::COMPLEX64) && dt != static_cast<uint8_t>(nm::COMPLEX128) && dt != static_cast<uint8_t>(nm::RUBYOBJ))
      rb_raise(rb_eArgError, "cannot save a non-complex matrix as hermitian");
  }

  ofstream f(RSTRING_PTR(file), std::ios::out | std::ios::binary);

  // Get the NMatrix version information.
  uint16_t major, minor, release, null16 = 0;
  get_version_info(major, minor, release);

  // WRITE FIRST 64-BIT BLOCK
  f.write(reinterpret_cast<const char*>(&major),   sizeof(uint16_t));
  f.write(reinterpret_cast<const char*>(&minor),   sizeof(uint16_t));
  f.write(reinterpret_cast<const char*>(&release), sizeof(uint16_t));
  f.write(reinterpret_cast<const char*>(&null16),  sizeof(uint16_t));

  uint8_t ZERO = 0;
  // WRITE SECOND 64-BIT BLOCK
  f.write(reinterpret_cast<const char*>(&dt), sizeof(uint8_t));
  f.write(reinterpret_cast<const char*>(&st), sizeof(uint8_t));
  f.write(reinterpret_cast<const char*>(&ZERO),sizeof(uint8_t));
  f.write(reinterpret_cast<const char*>(&sm), sizeof(uint8_t));
  f.write(reinterpret_cast<const char*>(&null16), sizeof(uint16_t));
  f.write(reinterpret_cast<const char*>(&dim), sizeof(uint16_t));

  // Write shape (in 64-bit blocks)
  write_padded_shape(f, nmatrix->storage->dim, nmatrix->storage->shape);

  if (nmatrix->stype == nm::DENSE_STORE) {
    write_padded_dense_elements(f, reinterpret_cast<DENSE_STORAGE*>(nmatrix->storage), symm_, nmatrix->storage->dtype);
  } else if (nmatrix->stype == nm::YALE_STORE) {
    YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(nmatrix->storage);
    uint32_t ndnz   = s->ndnz,
             length = nm_yale_storage_get_size(s);
    f.write(reinterpret_cast<const char*>(&ndnz),   sizeof(uint32_t));
    f.write(reinterpret_cast<const char*>(&length), sizeof(uint32_t));

    write_padded_yale_elements(f, s, length, symm_, s->dtype);
  }

  f.close();

  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
  NM_CONSERVATIVE(nm_unregister_value(&self));

  return Qtrue;
}


/*
 * Binary file reader for NMatrix standard format. file should be a path, which we aren't going to
 * check very carefully (in other words, this function should generally be called from a Ruby
 * helper method).
 *
 * Note that currently, this function will by default refuse to read files that are newer than
 * your version of NMatrix. To force an override, set the second argument to anything other than nil.
 *
 * Returns an NMatrix Ruby object.
 */
static VALUE nm_read(int argc, VALUE* argv, VALUE self) {
  using std::ifstream;

  NM_CONSERVATIVE(nm_register_values(argv, argc));
  NM_CONSERVATIVE(nm_register_value(&self));

  VALUE file, force_;

  // Read the arguments
  rb_scan_args(argc, argv, "11", &file, &force_);
  bool force   = (force_ != Qnil && force_ != Qfalse);


  if (!RB_FILE_EXISTS(file)) { // FIXME: Errno::ENOENT
    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    rb_raise(rb_get_errno_exc("ENOENT"), "%s", RSTRING_PTR(file));
  }

  // Open a file stream
  ifstream f(RSTRING_PTR(file), std::ios::in | std::ios::binary);

  uint16_t major, minor, release;
  get_version_info(major, minor, release); // compare to NMatrix version

  uint16_t fmajor, fminor, frelease, null16;

  // READ FIRST 64-BIT BLOCK
  f.read(reinterpret_cast<char*>(&fmajor),   sizeof(uint16_t));
  f.read(reinterpret_cast<char*>(&fminor),   sizeof(uint16_t));
  f.read(reinterpret_cast<char*>(&frelease), sizeof(uint16_t));
  f.read(reinterpret_cast<char*>(&null16),   sizeof(uint16_t));

  int ver  = major * 10000 + minor * 100 + release,
      fver = fmajor * 10000 + fminor * 100 + release;
  if (fver > ver && force == false) {
    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    rb_raise(rb_eIOError, "File was created in newer version of NMatrix than current (%u.%u.%u)", fmajor, fminor, frelease);
  }
  if (null16 != 0) rb_warn("nm_read: Expected zero padding was not zero (0)\n");

  uint8_t dt, st, it, sm;
  uint16_t dim;

  // READ SECOND 64-BIT BLOCK
  f.read(reinterpret_cast<char*>(&dt), sizeof(uint8_t));
  f.read(reinterpret_cast<char*>(&st), sizeof(uint8_t));
  f.read(reinterpret_cast<char*>(&it), sizeof(uint8_t)); // FIXME: should tell how few bytes indices are stored as
  f.read(reinterpret_cast<char*>(&sm), sizeof(uint8_t));
  f.read(reinterpret_cast<char*>(&null16), sizeof(uint16_t));
  f.read(reinterpret_cast<char*>(&dim), sizeof(uint16_t));

  if (null16 != 0) rb_warn("nm_read: Expected zero padding was not zero (1)");
  nm::stype_t stype = static_cast<nm::stype_t>(st);
  nm::dtype_t dtype = static_cast<nm::dtype_t>(dt);
  nm::symm_t  symm  = static_cast<nm::symm_t>(sm);
  //nm::itype_t itype = static_cast<nm::itype_t>(it);

  // READ NEXT FEW 64-BIT BLOCKS
  size_t* shape = NM_ALLOC_N(size_t, dim);
  read_padded_shape(f, dim, shape);

  STORAGE* s;
  if (stype == nm::DENSE_STORE) {
    s = nm_dense_storage_create(dtype, shape, dim, NULL, 0);
    nm_register_storage(stype, s);

    read_padded_dense_elements(f, reinterpret_cast<DENSE_STORAGE*>(s), symm, dtype);

  } else if (stype == nm::YALE_STORE) {
    uint32_t ndnz, length;

    // READ YALE-SPECIFIC 64-BIT BLOCK
    f.read(reinterpret_cast<char*>(&ndnz),     sizeof(uint32_t));
    f.read(reinterpret_cast<char*>(&length),   sizeof(uint32_t));

    s = nm_yale_storage_create(dtype, shape, dim, length); // set length as init capacity

    nm_register_storage(stype, s);

    read_padded_yale_elements(f, reinterpret_cast<YALE_STORAGE*>(s), length, symm, dtype);
  } else {
    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    rb_raise(nm_eStorageTypeError, "please convert to yale or dense before saving");
  }

  NMATRIX* nm = nm_create(stype, s);

  // Return the appropriate matrix object (Ruby VALUE)
  // FIXME: This should probably return CLASS_OF(self) instead of cNMatrix, but I don't know how that works for
  // FIXME: class methods.
  nm_register_nmatrix(nm);
  VALUE to_return = Data_Wrap_Struct(cNMatrix, nm_mark, nm_delete, nm);

  nm_unregister_nmatrix(nm);
  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
  NM_CONSERVATIVE(nm_unregister_value(&self));
  nm_unregister_storage(stype, s);

  switch(stype) {
  case nm::DENSE_STORE:
  case nm::YALE_STORE:
    return to_return;
  default: // this case never occurs (due to earlier rb_raise)
    return Qnil;
  }

}


/*
 * Create a new NMatrix helper for handling internal ia, ja, and a arguments.
 *
 * This constructor is only called by Ruby code, so we can skip most of the
 * checks.
 */
static VALUE nm_init_yale_from_old_yale(VALUE shape, VALUE dtype, VALUE ia, VALUE ja, VALUE a, VALUE from_dtype, VALUE nm) {
  size_t dim     = 2;
  size_t* shape_  = interpret_shape(shape, &dim);
  nm::dtype_t dtype_  = nm_dtype_from_rbsymbol(dtype);
  char *ia_       = RSTRING_PTR(ia),
       *ja_       = RSTRING_PTR(ja),
       *a_        = RSTRING_PTR(a);
  nm::dtype_t from_dtype_ = nm_dtype_from_rbsymbol(from_dtype);
  NMATRIX* nmatrix;

  UnwrapNMatrix( nm, nmatrix );

  nmatrix->stype   = nm::YALE_STORE;
  nmatrix->storage = (STORAGE*)nm_yale_storage_create_from_old_yale(dtype_, shape_, ia_, ja_, a_, from_dtype_);

  return nm;
}

/*
 * Check to determine whether matrix is a reference to another matrix.
 */
static VALUE nm_is_ref(VALUE self) {
  if (NM_SRC(self) == NM_STORAGE(self)) return Qfalse;
  return Qtrue;
}

/*
 * call-seq:
 *     slice -> ...
 *
 * Access the contents of an NMatrix at given coordinates, using copying.
 *
 *     n.slice(3,3)  # => 5.0
 *     n.slice(0..1,0..1) #=> matrix [2,2]
 *
 */
static VALUE nm_mget(int argc, VALUE* argv, VALUE self) {
  static void* (*ttable[nm::NUM_STYPES])(const STORAGE*, SLICE*) = {
    nm_dense_storage_get,
    nm_list_storage_get,
    nm_yale_storage_get
  };
  nm::stype_t stype = NM_STYPE(self);
  return nm_xslice(argc, argv, ttable[stype], nm_delete, self);
}

/*
 * call-seq:
 *     matrix[indices] -> ...
 *
 * Access the contents of an NMatrix at given coordinates by reference.
 *
 *     n[3,3]  # => 5.0
 *     n[0..1,0..1] #=> matrix [2,2]
 *
 */
static VALUE nm_mref(int argc, VALUE* argv, VALUE self) {
  static void* (*ttable[nm::NUM_STYPES])(const STORAGE*, SLICE*) = {
    nm_dense_storage_ref,
    nm_list_storage_ref,
    nm_yale_storage_ref
  };
  nm::stype_t stype = NM_STYPE(self);
  return nm_xslice(argc, argv, ttable[stype], nm_delete_ref, self);
}

/*
 * Modify the contents of an NMatrix in the given cell
 *
 *     n[3,3] = 5.0
 *
 * Also returns the new contents, so you can chain:
 *
 *     n[3,3] = n[2,3] = 5.0
 */
static VALUE nm_mset(int argc, VALUE* argv, VALUE self) {

  size_t dim = NM_DIM(self); // last arg is the value

  VALUE to_return = Qnil;

  if ((size_t)(argc) > NM_DIM(self)+1) {
    rb_raise(rb_eArgError, "wrong number of arguments (%d for %lu)", argc, effective_dim(NM_STORAGE(self))+1);
  } else {
    NM_CONSERVATIVE(nm_register_value(&self));
    NM_CONSERVATIVE(nm_register_values(argv, argc));

    SLICE slice_s;
    SLICE* slice = &slice_s;
    slice->coords = NM_ALLOCA_N(size_t, dim);
    slice->lengths = NM_ALLOCA_N(size_t, dim);
    init_slice_no_alloc(slice, dim, argc-1, argv, NM_STORAGE(self)->shape);

    static void (*ttable[nm::NUM_STYPES])(VALUE, SLICE*, VALUE) = {
      nm_dense_storage_set,
      nm_list_storage_set,
      nm_yale_storage_set
    };

    ttable[NM_STYPE(self)](self, slice, argv[argc-1]);

    to_return = argv[argc-1];

    NM_CONSERVATIVE(nm_unregister_value(&self));
    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
  }

  return to_return;
}

/*
 * Matrix multiply (dot product): against another matrix or a vector.
 *
 * For elementwise, use * instead.
 *
 * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
 */
static VALUE nm_multiply(VALUE left_v, VALUE right_v) {
  NM_CONSERVATIVE(nm_register_value(&left_v));
  NM_CONSERVATIVE(nm_register_value(&right_v));

  NMATRIX *left, *right;

  UnwrapNMatrix( left_v, left );

  if (NM_RUBYVAL_IS_NUMERIC(right_v)) {
    NM_CONSERVATIVE(nm_unregister_value(&left_v));
    NM_CONSERVATIVE(nm_unregister_value(&right_v));
    return matrix_multiply_scalar(left, right_v);
  }

  else if (RB_TYPE_P(right_v, T_ARRAY)) {
    NM_CONSERVATIVE(nm_unregister_value(&left_v));
    NM_CONSERVATIVE(nm_unregister_value(&right_v));
    rb_raise(rb_eNotImpError, "please convert array to nx1 or 1xn NMatrix first");
  }

  else { // both are matrices (probably)
    CheckNMatrixType(right_v);
    UnwrapNMatrix( right_v, right );

    // work like vector dot product for 1dim
    if (left->storage->dim == 1 && right->storage->dim == 1) {
      if (left->storage->shape[0] != right->storage->shape[0]) {
        NM_CONSERVATIVE(nm_unregister_value(&left_v));
        NM_CONSERVATIVE(nm_unregister_value(&right_v));
        rb_raise(rb_eArgError, "The left- and right-hand sides of the operation must have the same dimensionality.");
      } else {
        VALUE result = elementwise_op(nm::EW_MUL, left_v, right_v);
        VALUE to_return = rb_funcall(result, rb_intern("sum"),0);
        NM_CONSERVATIVE(nm_unregister_value(&left_v));
        NM_CONSERVATIVE(nm_unregister_value(&right_v));
        return to_return;
      }
    }

    if (left->storage->shape[1] != right->storage->shape[0]) {
      NM_CONSERVATIVE(nm_unregister_value(&left_v));
      NM_CONSERVATIVE(nm_unregister_value(&right_v));
      rb_raise(rb_eArgError, "incompatible dimensions");
    }

    if (left->stype != right->stype) {
      NM_CONSERVATIVE(nm_unregister_value(&left_v));
      NM_CONSERVATIVE(nm_unregister_value(&right_v));
      rb_raise(rb_eNotImpError, "matrices must have same stype");
    }

    NM_CONSERVATIVE(nm_unregister_value(&left_v));
    NM_CONSERVATIVE(nm_unregister_value(&right_v));
    return matrix_multiply(left, right);

  }

  NM_CONSERVATIVE(nm_unregister_value(&left_v));
  NM_CONSERVATIVE(nm_unregister_value(&right_v));

  return Qnil;
}


/*
 * call-seq:
 *     dim -> Integer
 *
 * Get the number of dimensions of a matrix.
 *
 * In other words, if you set your matrix to be 3x4, the dim is 2. If the
 * matrix was initialized as 3x4x3, the dim is 3.
 *
 * Use #effective_dim to get the dimension of an NMatrix which acts as a vector (e.g., a column or row).
 */
static VALUE nm_dim(VALUE self) {
  return INT2FIX(NM_STORAGE(self)->dim);
}

/*
 * call-seq:
 *     shape -> Array
 *
 * Get the shape (dimensions) of a matrix.
 */
static VALUE nm_shape(VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));
  STORAGE* s   = NM_STORAGE(self);

  // Copy elements into a VALUE array and then use those to create a Ruby array with rb_ary_new4.
  VALUE* shape = NM_ALLOCA_N(VALUE, s->dim);
  nm_register_values(shape, s->dim);
  for (size_t index = 0; index < s->dim; ++index)
    shape[index] = INT2FIX(s->shape[index]);

  nm_unregister_values(shape, s->dim);
  NM_CONSERVATIVE(nm_unregister_value(&self));
  return rb_ary_new4(s->dim, shape);
}


/*
 * call-seq:
 *     offset -> Array
 *
 * Get the offset (slice position) of a matrix. Typically all zeros, unless you have a reference slice.
 */
static VALUE nm_offset(VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));
  STORAGE* s   = NM_STORAGE(self);

  // Copy elements into a VALUE array and then use those to create a Ruby array with rb_ary_new4.
  VALUE* offset = NM_ALLOCA_N(VALUE, s->dim);
  nm_register_values(offset, s->dim);
  for (size_t index = 0; index < s->dim; ++index)
    offset[index] = INT2FIX(s->offset[index]);

  nm_unregister_values(offset, s->dim);
  NM_CONSERVATIVE(nm_unregister_value(&self));
  return rb_ary_new4(s->dim, offset);
}


/*
 * call-seq:
 *     supershape -> Array
 *
 * Get the shape of a slice's parent.
 */
static VALUE nm_supershape(VALUE self) {

  STORAGE* s   = NM_STORAGE(self);
  if (s->src == s) {
    return nm_shape(self); // easy case (not a slice)
  }
  else s = s->src;

  NM_CONSERVATIVE(nm_register_value(&self));

  VALUE* shape = NM_ALLOCA_N(VALUE, s->dim);
  nm_register_values(shape, s->dim);
  for (size_t index = 0; index < s->dim; ++index)
    shape[index] = INT2FIX(s->shape[index]);

  nm_unregister_values(shape, s->dim);
  NM_CONSERVATIVE(nm_unregister_value(&self));
  return rb_ary_new4(s->dim, shape);
}

/*
 * call-seq:
 *     stype -> Symbol
 *
 * Get the storage type (stype) of a matrix, e.g., :yale, :dense, or :list.
 */
static VALUE nm_stype(VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));
  VALUE stype = ID2SYM(rb_intern(STYPE_NAMES[NM_STYPE(self)]));
  NM_CONSERVATIVE(nm_unregister_value(&self));
  return stype;
}

/*
 * call-seq:
 *     symmetric? -> Boolean
 *
 * Is this matrix symmetric?
 */
static VALUE nm_symmetric(VALUE self) {
  return is_symmetric(self, false);
}


/*
 * Gets the dimension of a matrix which might be a vector (have one or more shape components of size 1).
 */
static size_t effective_dim(STORAGE* s) {
  size_t d = 0;
  for (size_t i = 0; i < s->dim; ++i) {
    if (s->shape[i] != 1) d++;
  }
  return d;
}


/*
 * call-seq:
 *     effective_dim -> Fixnum
 *
 * Returns the number of dimensions that don't have length 1. Guaranteed to be less than or equal to #dim.
 */
static VALUE nm_effective_dim(VALUE self) {
  return INT2FIX(effective_dim(NM_STORAGE(self)));
}


/*
 * Get a slice of an NMatrix.
 */
static VALUE nm_xslice(int argc, VALUE* argv, void* (*slice_func)(const STORAGE*, SLICE*), void (*delete_func)(NMATRIX*), VALUE self) {
  VALUE result = Qnil;

  STORAGE* s = NM_STORAGE(self);

  if (NM_DIM(self) < (size_t)(argc)) {
    rb_raise(rb_eArgError, "wrong number of arguments (%d for %lu)", argc, effective_dim(s));
  } else {

    NM_CONSERVATIVE(nm_register_values(argv, argc));
    NM_CONSERVATIVE(nm_register_value(&self));

    nm_register_value(&result);

    SLICE slice_s;
    SLICE* slice = &slice_s;
    size_t dim = NM_DIM(self);
    slice->coords = NM_ALLOCA_N(size_t, dim);
    slice->lengths = NM_ALLOCA_N(size_t, dim);
    init_slice_no_alloc(slice, dim, argc, argv, s->shape);

    if (slice->single) {
      static void* (*ttable[nm::NUM_STYPES])(const STORAGE*, SLICE*) = {
        nm_dense_storage_ref,
        nm_list_storage_ref,
        nm_yale_storage_ref
      };

      if (NM_DTYPE(self) == nm::RUBYOBJ)  result = *reinterpret_cast<VALUE*>( ttable[NM_STYPE(self)](s, slice) );
      else                                result = nm::rubyobj_from_cval( ttable[NM_STYPE(self)](s, slice), NM_DTYPE(self) ).rval;

    } else {

      NMATRIX* mat  = NM_ALLOC(NMATRIX);
      mat->stype    = NM_STYPE(self);
      mat->storage  = (STORAGE*)((*slice_func)( s, slice ));
      nm_register_nmatrix(mat);
      result        = Data_Wrap_Struct(CLASS_OF(self), nm_mark, delete_func, mat);
      nm_unregister_nmatrix(mat);
    }
  }

  nm_unregister_value(&result);
  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
  NM_CONSERVATIVE(nm_unregister_value(&self));

  return result;
}

//////////////////////
// Helper Functions //
//////////////////////

static VALUE unary_op(nm::unaryop_t op, VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));
  NMATRIX* left;
  UnwrapNMatrix(self, left);
  std::string sym;

  switch(left->stype) {
  case nm::DENSE_STORE:
    sym = "__dense_unary_" + nm::UNARYOPS[op] + "__";
    break;
  case nm::YALE_STORE:
    sym = "__yale_unary_" + nm::UNARYOPS[op]  + "__";
    break;
  case nm::LIST_STORE:
    sym = "__list_unary_" + nm::UNARYOPS[op]  + "__";
    break;
  }

  NM_CONSERVATIVE(nm_unregister_value(&self));
  return rb_funcall(self, rb_intern(sym.c_str()), 0);
}

static void check_dims_and_shape(VALUE left_val, VALUE right_val) {
    // Check that the left- and right-hand sides have the same dimensionality.
    if (NM_DIM(left_val) != NM_DIM(right_val)) {
      rb_raise(rb_eArgError, "The left- and right-hand sides of the operation must have the same dimensionality.");
    }
    // Check that the left- and right-hand sides have the same shape.
    if (memcmp(&NM_SHAPE(left_val, 0), &NM_SHAPE(right_val, 0), sizeof(size_t) * NM_DIM(left_val)) != 0) {
      rb_raise(rb_eArgError, "The left- and right-hand sides of the operation must have the same shape.");
    }
}

static VALUE elementwise_op(nm::ewop_t op, VALUE left_val, VALUE right_val) {

  NM_CONSERVATIVE(nm_register_value(&left_val));
  NM_CONSERVATIVE(nm_register_value(&right_val));

  NMATRIX* left;
  NMATRIX* result;

  CheckNMatrixType(left_val);
  UnwrapNMatrix(left_val, left);

  if (!IsNMatrixType(right_val)) {
    // This is a matrix-scalar element-wise operation.
    std::string sym;
    switch(left->stype) {
    case nm::DENSE_STORE:
      sym = "__dense_scalar_" + nm::EWOP_NAMES[op] + "__";
      break;
    case nm::YALE_STORE:
      sym = "__yale_scalar_" + nm::EWOP_NAMES[op] + "__";
      break;
    case nm::LIST_STORE:
      sym = "__list_scalar_" + nm::EWOP_NAMES[op] + "__";
      break;
    default:
      NM_CONSERVATIVE(nm_unregister_value(&left_val));
      NM_CONSERVATIVE(nm_unregister_value(&right_val));
      rb_raise(rb_eNotImpError, "unknown storage type requested scalar element-wise operation");
    }
    VALUE symv = rb_intern(sym.c_str());
    NM_CONSERVATIVE(nm_unregister_value(&left_val));
    NM_CONSERVATIVE(nm_unregister_value(&right_val));
    return rb_funcall(left_val, symv, 1, right_val);

  } else {

    check_dims_and_shape(left_val, right_val);

    NMATRIX* right;
    UnwrapNMatrix(right_val, right);

    if (left->stype == right->stype) {
      std::string sym;

      switch(left->stype) {
      case nm::DENSE_STORE:
        sym = "__dense_elementwise_" + nm::EWOP_NAMES[op] + "__";
        break;
      case nm::YALE_STORE:
        sym = "__yale_elementwise_" + nm::EWOP_NAMES[op] + "__";
        break;
      case nm::LIST_STORE:
        sym = "__list_elementwise_" + nm::EWOP_NAMES[op] + "__";
        break;
      default:
        NM_CONSERVATIVE(nm_unregister_value(&left_val));
        NM_CONSERVATIVE(nm_unregister_value(&right_val));
        rb_raise(rb_eNotImpError, "unknown storage type requested element-wise operation");
      }

      VALUE symv = rb_intern(sym.c_str());
      NM_CONSERVATIVE(nm_unregister_value(&left_val));
      NM_CONSERVATIVE(nm_unregister_value(&right_val));
      return rb_funcall(left_val, symv, 1, right_val);

    } else {
      NM_CONSERVATIVE(nm_unregister_value(&left_val));
      NM_CONSERVATIVE(nm_unregister_value(&right_val));
      rb_raise(rb_eArgError, "Element-wise operations are not currently supported between matrices with differing stypes.");
    }
  }

  NM_CONSERVATIVE(nm_unregister_value(&left_val));
  NM_CONSERVATIVE(nm_unregister_value(&right_val));
  return Data_Wrap_Struct(CLASS_OF(left_val), nm_mark, nm_delete, result);
}

static VALUE noncom_elementwise_op(nm::noncom_ewop_t op, VALUE self, VALUE other, VALUE flip) {

  NM_CONSERVATIVE(nm_register_value(&self));
  NM_CONSERVATIVE(nm_register_value(&other));

  NMATRIX* self_nm;
  NMATRIX* result;

  CheckNMatrixType(self);
  UnwrapNMatrix(self, self_nm);

  if (!IsNMatrixType(other)) {
    // This is a matrix-scalar element-wise operation.
    std::string sym;
    switch(self_nm->stype) {
    case nm::DENSE_STORE:
      sym = "__dense_scalar_" + nm::NONCOM_EWOP_NAMES[op] + "__";
      break;
    case nm::YALE_STORE:
      sym = "__yale_scalar_" + nm::NONCOM_EWOP_NAMES[op] + "__";
      break;
    case nm::LIST_STORE:
      sym = "__list_scalar_" + nm::NONCOM_EWOP_NAMES[op] + "__";
      break;
    default:
      NM_CONSERVATIVE(nm_unregister_value(&self));
      NM_CONSERVATIVE(nm_unregister_value(&other));
      rb_raise(rb_eNotImpError, "unknown storage type requested scalar element-wise operation");
    }
    NM_CONSERVATIVE(nm_unregister_value(&self));
    NM_CONSERVATIVE(nm_unregister_value(&other));
    return rb_funcall(self, rb_intern(sym.c_str()), 2, other, flip);

  } else {

    check_dims_and_shape(self, other);

    NMATRIX* other_nm;
    UnwrapNMatrix(other, other_nm);

    if (self_nm->stype == other_nm->stype) {
      std::string sym;

      switch(self_nm->stype) {
      case nm::DENSE_STORE:
        sym = "__dense_elementwise_" + nm::NONCOM_EWOP_NAMES[op] + "__";
        break;
      case nm::YALE_STORE:
        sym = "__yale_elementwise_" + nm::NONCOM_EWOP_NAMES[op] + "__";
        break;
      case nm::LIST_STORE:
        sym = "__list_elementwise_" + nm::NONCOM_EWOP_NAMES[op] + "__";
        break;
      default:
  NM_CONSERVATIVE(nm_unregister_value(&self));
  NM_CONSERVATIVE(nm_unregister_value(&other));
  rb_raise(rb_eNotImpError, "unknown storage type requested element-wise operation");
      }
      NM_CONSERVATIVE(nm_unregister_value(&self));
      NM_CONSERVATIVE(nm_unregister_value(&other));
      return rb_funcall(self, rb_intern(sym.c_str()), 2, other, flip);

    } else {
      nm_unregister_value(&self);
      nm_unregister_value(&other);
      rb_raise(rb_eArgError, "Element-wise operations are not currently supported between matrices with differing stypes.");
    }
  }
  NM_CONSERVATIVE(nm_unregister_value(&self));
  NM_CONSERVATIVE(nm_unregister_value(&other));
  return Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, result);
}

/*
 * Check to determine whether matrix is a reference to another matrix.
 */
bool is_ref(const NMATRIX* matrix) {
  return matrix->storage->src != matrix->storage;
}

/*
 * Helper function for nm_symmetric and nm_hermitian.
 */
static VALUE is_symmetric(VALUE self, bool hermitian) {
  NM_CONSERVATIVE(nm_register_value(&self));

  NMATRIX* m;
  UnwrapNMatrix(self, m);
  bool is_symmetric = false;

  if (m->storage->shape[0] == m->storage->shape[1] and m->storage->dim == 2) {
    if (NM_STYPE(self) == nm::DENSE_STORE) {
      if (hermitian) {
        is_symmetric = nm_dense_storage_is_hermitian((DENSE_STORAGE*)(m->storage), m->storage->shape[0]);

      } else {
        is_symmetric = nm_dense_storage_is_symmetric((DENSE_STORAGE*)(m->storage), m->storage->shape[0]);
      }

    } else {
      // TODO: Implement, at the very least, yale_is_symmetric. Model it after yale/transp.template.c.
      NM_CONSERVATIVE(nm_unregister_value(&self));
      rb_raise(rb_eNotImpError, "symmetric? and hermitian? only implemented for dense currently");
    }

  }
  NM_CONSERVATIVE(nm_unregister_value(&self));
  return is_symmetric ? Qtrue : Qfalse;
}

///////////////////////
// Utility Functions //
///////////////////////

/*
 * Guess the dtype given a Ruby VALUE and return it as a symbol.
 *
 * Not to be confused with nm_dtype_guess, which returns an nm::dtype_t. (This calls that.)
 */
static VALUE nm_guess_dtype(VALUE self, VALUE v) {
  return ID2SYM(rb_intern(DTYPE_NAMES[nm_dtype_guess(v)]));
}

/*
 * Get the minimum allowable dtype for a Ruby VALUE and return it as a symbol.
 */
static VALUE nm_min_dtype(VALUE self, VALUE v) {
  return ID2SYM(rb_intern(DTYPE_NAMES[nm_dtype_min(v)]));
}

/*
 * Helper for nm_dtype_min(), handling integers.
 */
nm::dtype_t nm_dtype_min_fixnum(int64_t v) {
  if (v >= 0 && v <= UCHAR_MAX) return nm::BYTE;
  else {
    v = std::abs(v);
    if (v <= CHAR_MAX) return nm::INT8;
    else if (v <= SHRT_MAX) return nm::INT16;
    else if (v <= INT_MAX) return nm::INT32;
    else return nm::INT64;
  }
}

/*
 * Return the minimum dtype required to store a given value.
 *
 * This is kind of arbitrary. For Float, it always returns :float32 for example, since in some cases neither :float64
 * not :float32 are sufficient.
 *
 * This function is used in upcasting for scalar math. We want to ensure that :int8 + 1 does not return an :int64, basically.
 *
 * FIXME: Eventually, this function should actually look at the value stored in Fixnums (for example), so that it knows
 * whether to return :int64 or :int32.
 */
nm::dtype_t nm_dtype_min(VALUE v) {

  if (RB_TYPE_P(v, T_FIXNUM))
    return nm_dtype_min_fixnum(FIX2LONG(v));
  else if (RB_TYPE_P(v, T_BIGNUM))
    return nm::INT64;
  else if (RB_TYPE_P(v, T_FLOAT))
    return nm::FLOAT32;
  else if (RB_TYPE_P(v, T_COMPLEX))
    return nm::COMPLEX64;
  else if (RB_TYPE_P(v, T_STRING))
    return RSTRING_LEN(v) == 1 ? nm::BYTE : nm::RUBYOBJ;
  else if (RB_TYPE_P(v, T_TRUE) || RB_TYPE_P(v, T_FALSE) || RB_TYPE_P(v, T_NIL))
    return nm::RUBYOBJ;
  else
    return nm::RUBYOBJ;
}


/*
 * Guess the data type given a value.
 *
 * TODO: Probably needs some work for Bignum.
 */
nm::dtype_t nm_dtype_guess(VALUE v) {
  if (RB_TYPE_P(v, T_TRUE) || RB_TYPE_P(v, T_FALSE) || RB_TYPE_P(v, T_NIL))
    return nm::RUBYOBJ;
  else if (RB_TYPE_P(v, T_STRING))
    return RSTRING_LEN(v) == 1 ? nm::BYTE : nm::RUBYOBJ;
  else if (RB_TYPE_P(v, T_FIXNUM))
#if SIZEOF_INT == 8
    return nm::INT64;
#elif SIZEOF_INT == 4
    return nm::INT32;
#else
    return nm::INT16;
#endif
  else if (RB_TYPE_P(v, T_BIGNUM))
    return nm::INT64;
#if SIZEOF_FLOAT == 4
  else if (RB_TYPE_P(v, T_COMPLEX))
    return nm::COMPLEX128;
  else if (RB_TYPE_P(v, T_FLOAT))
    return nm::FLOAT64;
#elif SIZEOF_FLOAT == 2
  else if (RB_TYPE_P(v, T_COMPLEX))
    return nm::COMPLEX64;
  else if (RB_TYPE_P(v, T_FLOAT))
    return nm::FLOAT32;
#endif
  else if (RB_TYPE_P(v, T_ARRAY))
    /*
     * May be passed for dense -- for now, just look at the first element.
     *
     * TODO: Look at entire array for most specific type.
     */
    return nm_dtype_guess(RARRAY_AREF(v, 0));
  else {
    RB_P(v);
    rb_raise(rb_eArgError, "Unable to guess a data type from provided parameters; data type must be specified manually.");
  }
}

/*
 * Modify an existing SLICE object (with properly allocated memory),
 * so that it will contain the appropriate coordinate and length information
 * for accessing some part of a matrix.
 */
static void init_slice_no_alloc(SLICE* slice, size_t dim, int argc, VALUE* arg, size_t* shape) {
  NM_CONSERVATIVE(nm_register_values(arg, argc));

  VALUE beg, end;
  int excl;

  slice->single = true;

  // r is the shape position; t is the slice position. They may differ when we're dealing with a
  // matrix where the effective dimension is less than the dimension (e.g., a vector).
  for (size_t r = 0, t = 0; r < dim; ++r) {
    VALUE v = t == (unsigned int)argc ? Qnil : arg[t];

    // if the current shape indicates a vector and fewer args were supplied than necessary, just use 0
    if (argc - t + r < dim && shape[r] == 1) {
      slice->coords[r]  = 0;
      slice->lengths[r] = 1;

    } else if (FIXNUM_P(v)) { // this used CLASS_OF before, which is inefficient for fixnum
      int v_ = FIX2INT(v);
      if (v_ < 0) // checking for negative indexes
        slice->coords[r]  = shape[r]+v_;
      else
        slice->coords[r]  = v_;
      slice->lengths[r] = 1;
      t++;

    } else if (SYMBOL_P(v) && rb_to_id(v) == nm_rb_mul) { // :* means the whole possible range

      slice->coords[r]  = 0;
      slice->lengths[r] = shape[r];
      slice->single     = false;
      t++;

    } else if (CLASS_OF(v) == rb_cRange) {
      rb_range_values(arg[t], &beg, &end, &excl);

      int begin_ = FIX2INT(beg);
      int end_   = FIX2INT(end);

      slice->coords[r] = (begin_ < 0) ? shape[r] + begin_ : begin_;

      // Exclude last element for a...b range
      if (end_ < 0)
        slice->lengths[r] = shape[r] + end_ - slice->coords[r] + (excl ? 0 : 1);
      else
        slice->lengths[r] = end_ - slice->coords[r] + (excl ? 0 : 1);

      slice->single     = false;
      t++;

    } else {
      NM_CONSERVATIVE(nm_unregister_values(arg, argc));
      rb_raise(rb_eArgError, "expected Fixnum or Range for slice component instead of %s", rb_obj_classname(v));
    }

    if (slice->coords[r] > shape[r] || slice->coords[r] + slice->lengths[r] > shape[r]) {
      NM_CONSERVATIVE(nm_unregister_values(arg, argc));
      rb_raise(rb_eRangeError, "slice is larger than matrix in dimension %lu (slice component %lu)", r, t);
    }
  }

  NM_CONSERVATIVE(nm_unregister_values(arg, argc));
}

#ifdef BENCHMARK
/*
 * A simple function used when benchmarking NMatrix.
 */
static double get_time(void) {
  struct timeval t;
  struct timezone tzp;

  gettimeofday(&t, &tzp);

  return t.tv_sec + t.tv_usec*1e-6;
}
#endif

/*
 * The argv parameter will be either 1 or 2 elements.  If 1, could be either
 * initial or dtype.  If 2, is initial and dtype. This function returns the
 * dtype.
 */
static nm::dtype_t interpret_dtype(int argc, VALUE* argv, nm::stype_t stype) {
  int offset;

  switch (argc) {
    case 1:
      offset = 0;
      break;

    case 2:
      offset = 1;
      break;

    default:
      rb_raise(rb_eArgError, "Need an initial value or a dtype.");
      break;
  }

  if (SYMBOL_P(argv[offset])) {
    return nm_dtype_from_rbsymbol(argv[offset]);

  } else if (RB_TYPE_P(argv[offset], T_STRING)) {
    return nm_dtype_from_rbstring(StringValue(argv[offset]));

  } else if (stype == nm::YALE_STORE) {
    rb_raise(rb_eArgError, "Yale storage class requires a dtype.");

  } else {
    return nm_dtype_guess(argv[0]);
  }
}

/*
 * Convert an Ruby value or an array of Ruby values into initial C values.
 */
static void* interpret_initial_value(VALUE arg, nm::dtype_t dtype) {
  NM_CONSERVATIVE(nm_register_value(&arg));

  unsigned int index;
  void* init_val;

  if (RB_TYPE_P(arg, T_ARRAY)) {
    // Array
    init_val = NM_ALLOC_N(char, DTYPE_SIZES[dtype] * RARRAY_LEN(arg));
    NM_CHECK_ALLOC(init_val);
    for (index = 0; index < RARRAY_LEN(arg); ++index) {
      rubyval_to_cval(RARRAY_AREF(arg, index), dtype, (char*)init_val + (index * DTYPE_SIZES[dtype]));
    }

  } else {
    // Single value
    init_val = rubyobj_to_cval(arg, dtype);
  }

  NM_CONSERVATIVE(nm_unregister_value(&arg));
  return init_val;
}

/*
 * Convert the shape argument, which may be either a Ruby value or an array of
 * Ruby values, into C values.  The second argument is where the dimensionality
 * of the matrix will be stored.  The function itself returns a pointer to the
 * array describing the shape, which must be freed manually.
 */
static size_t* interpret_shape(VALUE arg, size_t* dim) {
  NM_CONSERVATIVE(nm_register_value(&arg));
  size_t* shape;

  if (RB_TYPE_P(arg, T_ARRAY)) {
    *dim = RARRAY_LEN(arg);
    shape = NM_ALLOC_N(size_t, *dim);

    for (size_t index = 0; index < *dim; ++index) {
      shape[index] = FIX2UINT( RARRAY_AREF(arg, index) );
    }

  } else if (FIXNUM_P(arg)) {
    *dim = 2;
    shape = NM_ALLOC_N(size_t, *dim);

    shape[0] = FIX2UINT(arg);
    shape[1] = FIX2UINT(arg);

  } else {
    nm_unregister_value(&arg);
    rb_raise(rb_eArgError, "Expected an array of numbers or a single Fixnum for matrix shape");
  }

  NM_CONSERVATIVE(nm_unregister_value(&arg));
  return shape;
}

/*
 * Convert a Ruby symbol or string into an storage type.
 */
static nm::stype_t interpret_stype(VALUE arg) {
  if (SYMBOL_P(arg)) {
    return nm_stype_from_rbsymbol(arg);

  } else if (RB_TYPE_P(arg, T_STRING)) {
    return nm_stype_from_rbstring(StringValue(arg));

  } else {
    rb_raise(rb_eArgError, "Expected storage type");
  }
}

//////////////////
// Math Helpers //
//////////////////

STORAGE* matrix_storage_cast_alloc(NMATRIX* matrix, nm::dtype_t new_dtype) {
  if (matrix->storage->dtype == new_dtype && !is_ref(matrix))
    return matrix->storage;

  CAST_TABLE(cast_copy_storage);
  return cast_copy_storage[matrix->stype][matrix->stype](matrix->storage, new_dtype, NULL);
}

STORAGE_PAIR binary_storage_cast_alloc(NMATRIX* left_matrix, NMATRIX* right_matrix) {
  nm_register_nmatrix(left_matrix);
  nm_register_nmatrix(right_matrix);

  STORAGE_PAIR casted;
  nm::dtype_t new_dtype = Upcast[left_matrix->storage->dtype][right_matrix->storage->dtype];

  casted.left  = matrix_storage_cast_alloc(left_matrix, new_dtype);
  nm_register_storage(left_matrix->stype, casted.left);
  casted.right = matrix_storage_cast_alloc(right_matrix, new_dtype);

  nm_unregister_nmatrix(left_matrix);
  nm_unregister_nmatrix(right_matrix);
  nm_unregister_storage(left_matrix->stype, casted.left);

  return casted;
}

static VALUE matrix_multiply_scalar(NMATRIX* left, VALUE scalar) {
  rb_raise(rb_eNotImpError, "matrix-scalar multiplication not implemented yet");
  return Qnil;
}

static VALUE matrix_multiply(NMATRIX* left, NMATRIX* right) {
  nm_register_nmatrix(left);
  nm_register_nmatrix(right);
  ///TODO: multiplication for non-dense and/or non-decimal matrices

  // Make sure both of our matrices are of the correct type.
  STORAGE_PAIR casted = binary_storage_cast_alloc(left, right);
  nm_register_storage(left->stype, casted.left);
  nm_register_storage(right->stype, casted.right);

  size_t*  resulting_shape   = NM_ALLOC_N(size_t, 2);
  resulting_shape[0] = left->storage->shape[0];
  resulting_shape[1] = right->storage->shape[1];

  // Sometimes we only need to use matrix-vector multiplication (e.g., GEMM versus GEMV). Find out.
  bool vector = false;
  if (resulting_shape[1] == 1) vector = true;

  static STORAGE* (*storage_matrix_multiply[nm::NUM_STYPES])(const STORAGE_PAIR&, size_t*, bool) = {
    nm_dense_storage_matrix_multiply,
    nm_list_storage_matrix_multiply,
    nm_yale_storage_matrix_multiply
  };

  STORAGE* resulting_storage = storage_matrix_multiply[left->stype](casted, resulting_shape, vector);
  NMATRIX* result = nm_create(left->stype, resulting_storage);
  nm_register_nmatrix(result);

  // Free any casted-storage we created for the multiplication.
  // TODO: Can we make the Ruby GC take care of this stuff now that we're using it?
  // If we did that, we night not have to re-create these every time, right? Or wrong? Need to do
  // more research.
  static void (*free_storage[nm::NUM_STYPES])(STORAGE*) = {
    nm_dense_storage_delete,
    nm_list_storage_delete,
    nm_yale_storage_delete
  };

  nm_unregister_storage(left->stype, casted.left);
  if (left->storage != casted.left)   free_storage[result->stype](casted.left);

  nm_unregister_storage(right->stype, casted.right);
  if (right->storage != casted.right) free_storage[result->stype](casted.right);

  VALUE to_return = result ? Data_Wrap_Struct(cNMatrix, nm_mark, nm_delete, result) : Qnil; // Only if we try to multiply list matrices should we return Qnil.

  nm_unregister_nmatrix(left);
  nm_unregister_nmatrix(right);
  nm_unregister_nmatrix(result);

  return to_return;
}

/*
 * Reduce a matrix to hessenberg form.
 *
 * == Arguments
 *
 * a - The NMatrix to be reduced. This matrix is replaced with the hessenberg form.
 *
 * == Notes 
 *
 * LAPACK free.
 */
static VALUE nm_hessenberg(VALUE self, VALUE a) {
  nm_math_hessenberg(a);
  
  return a;
}

/*
 * Calculate the inverse of a matrix with in-place Gauss-Jordan elimination.
 * Inverse will fail if the largest element in any column in zero. 
 *
 * LAPACK free.
 */
static VALUE nm_inverse(VALUE self, VALUE inverse, VALUE bang) {

  if (NM_STYPE(self) != nm::DENSE_STORE) {
    rb_raise(rb_eNotImpError, "needs exact determinant implementation for this matrix stype");
    return Qnil;
  }

  if (NM_DIM(self) != 2 || NM_SHAPE0(self) != NM_SHAPE1(self)) {
    rb_raise(nm_eShapeError, "matrices must be square to have an inverse defined");
    return Qnil;
  }

  if (bang == Qtrue) {
    nm_math_inverse(NM_SHAPE0(self), NM_STORAGE_DENSE(self)->elements, 
      NM_DTYPE(self));
          
    return self;
  }

  nm_math_inverse(NM_SHAPE0(inverse), NM_STORAGE_DENSE(inverse)->elements, 
    NM_DTYPE(inverse));

  return inverse;
}

/*
 * Calculate the exact inverse of a 2x2 or 3x3 matrix.
 *
 * Does not test for invertibility!
 */
static VALUE nm_inverse_exact(VALUE self, VALUE inverse, VALUE lda, VALUE ldb) {
  if (NM_DIM(self) != 2 || NM_SHAPE0(self) != NM_SHAPE1(self)) {
    rb_raise(nm_eShapeError, "matrices must be square to have an inverse defined");
    return Qnil;
  }

  nm::dtype_t dtype = NM_DTYPE(self);
  void* result = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  if (dtype == nm::RUBYOBJ) {
    nm_register_values(reinterpret_cast<VALUE*>(result), 1);
  }
  nm::stype_t old_stype = NM_STYPE(self);
  if (old_stype == nm::LIST_STORE) {
    self = nm_cast_with_types(self, nm::YALE_STORE, dtype, result);
    inverse = nm_cast_with_types(inverse, nm::YALE_STORE, dtype, result);
  }

  if (NM_STYPE(self) == nm::DENSE_STORE) {
    nm_math_inverse_exact_from_dense(NM_SHAPE0(self), 
      NM_STORAGE_DENSE(self)->elements, FIX2INT(lda), 
      NM_STORAGE_DENSE(inverse)->elements, FIX2INT(ldb), dtype);
  } else {
    nm_math_inverse_exact_from_yale(NM_SHAPE0(self), 
      NM_STORAGE_YALE(self), FIX2INT(lda), 
      NM_STORAGE_YALE(inverse), FIX2INT(ldb), dtype);
  }

  if (old_stype == nm::LIST_STORE) {
    inverse = nm_cast_with_types(inverse, nm::LIST_STORE, dtype, result);
  }
  if (dtype == nm::RUBYOBJ) {
    nm_unregister_values(reinterpret_cast<VALUE*>(result), 1);
  }
  return inverse;
}

/*
 * Calculate the exact determinant of a dense matrix.
 *
 * Returns nil for dense matrices which are not square or number of dimensions other than 2.
 *
 * Note: Currently only implemented for 2x2 and 3x3 matrices.
 */
static VALUE nm_det_exact(VALUE self) {

  if (NM_DIM(self) != 2 || NM_SHAPE0(self) != NM_SHAPE1(self)) {
    rb_raise(nm_eShapeError, "matrices must be square to have a determinant defined");
    return Qnil;
  }

  nm::dtype_t dtype = NM_DTYPE(self);
  void* result = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  if (NM_STYPE(self) == nm::LIST_STORE) {
    self = nm_cast_with_types(self, nm::YALE_STORE, dtype, result);
  }

  NM_CONSERVATIVE(nm_register_value(&self));

  // Calculate the determinant and then assign it to the return value
  if (NM_STYPE(self) == nm::DENSE_STORE) {
    nm_math_det_exact_from_dense(NM_SHAPE0(self), NM_STORAGE_DENSE(self)->elements, 
          NM_SHAPE0(self), NM_DTYPE(self), result);
  } else {
    nm_math_det_exact_from_yale(NM_SHAPE0(self), NM_STORAGE_YALE(self), 
          NM_SHAPE0(self), NM_DTYPE(self), result);
  }

  VALUE to_return;
  if (dtype == nm::RUBYOBJ) {
    to_return = *reinterpret_cast<VALUE*>(result);

  } else {
    to_return = nm::rubyobj_from_cval(result, NM_DTYPE(self)).rval;
  }
  NM_CONSERVATIVE(nm_unregister_value(&self));

  return to_return;
}


/*
 * Returns the pointer to the matrix storage's data. This is useful primarily when you are using FFI with NMatrix --
 * say, for example, you want to pass a float* to some function, and your NMatrix is a :float32 :dense matrix. Then you
 * can call this function and get that pointer directly instead of copying the data.
 */
static VALUE nm_data_pointer(VALUE self) {
  //if (NM_DTYPE(self) == nm::LIST_STORE)
  //  rb_warn("pointer requested for list storage, which may be meaningless");

  // This is actually pretty easy, since all of the storage types have their elements positioned in the same place
  // relative to one another. So yes, believe it or not, this should work just as well for Yale or list storage as for
  // dense.
  return INT2FIX(NM_STORAGE_DENSE(self)->elements);
}


/////////////////
// Exposed API //
/////////////////

/*
 * Create a dense matrix. Used by the NMatrix GSL fork. Unlike nm_create, this one copies all of the
 * arrays and such passed in -- so you don't have to allocate and pass a new shape object for every
 * matrix you want to create, for example. Same goes for elements.
 *
 * Returns a properly-wrapped Ruby object as a VALUE.
 *
 * *** Note that this function is for API only. Please do not use it internally.
 *
 * TODO: Add a column-major option for libraries that use column-major matrices.
 */
VALUE rb_nmatrix_dense_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t length) {

  if (dtype == nm::RUBYOBJ) {
    nm_register_values(reinterpret_cast<VALUE*>(elements), length);
  }

  NMATRIX* nm;
  size_t nm_dim;
  size_t* shape_copy;

  // Do not allow a dim of 1. Treat it as a column or row matrix.
  if (dim == 1) {
    nm_dim        = 2;
    shape_copy    = NM_ALLOC_N(size_t, nm_dim);
    shape_copy[0]  = shape[0];
    shape_copy[1]  = 1;

  } else {
    nm_dim      = dim;
    shape_copy  = NM_ALLOC_N(size_t, nm_dim);
    memcpy(shape_copy, shape, sizeof(size_t)*nm_dim);
  }

  // Copy elements
  void* elements_copy = NM_ALLOC_N(char, DTYPE_SIZES[dtype]*length);
  memcpy(elements_copy, elements, DTYPE_SIZES[dtype]*length);

  // allocate and create the matrix and its storage
  nm = nm_create(nm::DENSE_STORE, nm_dense_storage_create(dtype, shape_copy, dim, elements_copy, length));

  nm_register_nmatrix(nm);

  VALUE to_return = Data_Wrap_Struct(cNMatrix, nm_mark, nm_delete, nm);

  nm_unregister_nmatrix(nm);
  if (dtype == nm::RUBYOBJ) {
    nm_unregister_values(reinterpret_cast<VALUE*>(elements), length);
  }

  // tell Ruby about the matrix and its storage, particularly how to garbage collect it.
  return to_return;
}

/*
 * Create a dense vector. Used by the NMatrix GSL fork.
 *
 * Basically just a convenience wrapper for rb_nmatrix_dense_create().
 *
 * Returns a properly-wrapped Ruby NMatrix object as a VALUE. Included for backwards compatibility
 * for when NMatrix had an NVector class.
 */
VALUE rb_nvector_dense_create(nm::dtype_t dtype, void* elements, size_t length) {
  size_t dim = 1, shape = length;
  return rb_nmatrix_dense_create(dtype, &shape, dim, elements, length);
}


================================================
FILE: ext/nmatrix/storage/common.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == common.cpp
//
// Code for the STORAGE struct that is common to all storage types.

/*
 * Standard Includes
 */

/*
 * Project Includes
 */

#include "common.h"

/*
 * Macros
 */

/*
 * Global Variables
 */

/*
 * Forward Declarations
 */

/*
 * Functions
 */

extern "C" {
  /*
   * Calculate the number of elements in the dense storage structure, based on
   * shape and dim.
   */
  size_t nm_storage_count_max_elements(const STORAGE* storage) {
    unsigned int i;
    size_t count = 1;

    for (i = storage->dim; i-- > 0;) {
      count *= storage->shape[i];
    }

    return count;
  }

  // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
  // the matrix's storage.
  VALUE nm_enumerator_length(VALUE nmatrix) {
    long len = nm_storage_count_max_elements(NM_STORAGE_DENSE(nmatrix));
    return LONG2NUM(len);
  }

} // end of extern "C" block


================================================
FILE: ext/nmatrix/storage/common.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == common.h
//
// Header file for code common to all storage types.

#ifndef STORAGE_COMMON_H
#define STORAGE_COMMON_H

/*
 * Standard Includes
 */

#include <ruby.h>
#include <cmath> // pow().
#include <type_traits>

/*
 * Project Includes
 */
#include "data/data.h"
#include "nmatrix.h"

/*
 * Macros
 */

#define u_int8_t static_assert(false, "Please use uint8_t for cross-platform support and consistency."); uint8_t
#define u_int16_t static_assert(false, "Please use uint16_t for cross-platform support and consistency."); uint16_t
#define u_int32_t static_assert(false, "Please use uint32_t for cross-platform support and consistency."); uint32_t
#define u_int64_t static_assert(false, "Please use uint64_t for cross-platform support and consistency."); uint64_t

extern "C" {

/*
 * Types
 */

// For binary operations involving matrices that need to be casted.
struct STORAGE_PAIR {
  STORAGE* left;
  STORAGE* right;
};

struct SLICE {
  size_t*  coords; // Coordinate of first element
  size_t*  lengths; // Lengths of slice
  bool    single; // true if all lengths equal to 1 (represents single matrix element)
};

/*
 * Data
 */

/*
 * Functions
 */

  size_t nm_storage_count_max_elements(const STORAGE* storage);
  VALUE nm_enumerator_length(VALUE nmatrix);

} // end of extern "C" block

namespace nm {

  /*
   * Templated helper function for element-wise operations, used by dense, yale, and list.
   */
  template <ewop_t op, typename LDType, typename RDType>
  inline VALUE ew_op_switch(LDType left, RDType right) {
    switch (op) {
      case EW_ADD:
        return RubyObject(left + right).rval;

      case EW_SUB:
        return RubyObject(left - right).rval;

      case EW_MUL:
        return RubyObject(left * right).rval;

      case EW_DIV:
        return RubyObject(left / right).rval;

      case EW_POW:
        return RubyObject(pow(left, right)).rval;

      case EW_MOD:
        rb_raise(rb_eNotImpError, "Element-wise modulo is currently not supported.");
        break;

      default:
        rb_raise(rb_eStandardError, "This should not happen.");
    }
    return Qnil;
  }

  #define EWOP_INT_INT_DIV(ltype, rtype)       template <>       \
  inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
    if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE");  \
    if ((left > 0 && right > 0) || (left < 0 && right < 0)) \
      return left / right;  \
    else \
      return ( ltype )(std::floor((double)(left) / (double)(right)));  \
  }

  #define EWOP_UINT_UINT_DIV(ltype, rtype)       template <>       \
  inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
    if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE");  \
    return left / right;  \
  }

  #define EWOP_INT_UINT_DIV(ltype, rtype)       template <>       \
  inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
    if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE");  \
    if (left > 0 )  return left / right;  \
    else            return ( ltype )(std::floor((double)(left) / (double)(right)));  \
  }

  #define EWOP_UINT_INT_DIV(ltype, rtype)       template <>       \
  inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
    if (right == 0) rb_raise(rb_eZeroDivError, "cannot divide type by 0, would throw SIGFPE");  \
    if (right > 0)  return left / right;  \
    else            return ( ltype )(std::floor((double)(left) / (double)(right)));  \
  }

  #define EWOP_FLOAT_INT_DIV(ltype, rtype)       template <>       \
  inline VALUE ew_op_switch<EW_DIV>( ltype left, rtype right) { \
    return left / (ltype)(right);  \
  }

  // Ensure that divisions are done in the Ruby way, and that (int)x/0 always raises a Ruby error instead
  // of throwing a SIGFPE.
  EWOP_INT_INT_DIV(int64_t, int64_t)
  EWOP_INT_INT_DIV(int32_t, int32_t)
  EWOP_INT_INT_DIV(int32_t, int64_t)
  EWOP_INT_INT_DIV(int16_t, int16_t)
  EWOP_INT_INT_DIV(int16_t, int32_t)
  EWOP_INT_INT_DIV(int16_t, int64_t)
  EWOP_INT_INT_DIV(int8_t, int8_t)
  EWOP_INT_UINT_DIV(int8_t, uint8_t)
  EWOP_INT_INT_DIV(int8_t, int16_t)
  EWOP_INT_INT_DIV(int8_t, int32_t)
  EWOP_INT_INT_DIV(int8_t, int64_t)
  EWOP_UINT_UINT_DIV(uint8_t, uint8_t)
  EWOP_UINT_INT_DIV(uint8_t, int8_t)
  EWOP_UINT_INT_DIV(uint8_t, int16_t)
  EWOP_UINT_INT_DIV(uint8_t, int32_t)
  EWOP_UINT_INT_DIV(uint8_t, int64_t)
  EWOP_FLOAT_INT_DIV(float, int8_t)
  EWOP_FLOAT_INT_DIV(float, uint8_t)
  EWOP_FLOAT_INT_DIV(float, int16_t)
  EWOP_FLOAT_INT_DIV(float, int32_t)
  EWOP_FLOAT_INT_DIV(float, int64_t)
  EWOP_FLOAT_INT_DIV(double, int8_t)
  EWOP_FLOAT_INT_DIV(double, uint8_t)
  EWOP_FLOAT_INT_DIV(double, int16_t)
  EWOP_FLOAT_INT_DIV(double, int32_t)
  EWOP_FLOAT_INT_DIV(double, int64_t)

}

#endif // STORAGE_COMMON_H


================================================
FILE: ext/nmatrix/storage/dense/dense.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == dense.c
//
// Dense n-dimensional matrix storage.

/*
 * Standard Includes
 */

#include <ruby.h>

/*
 * Project Includes
 */
#include "../../data/data.h"
#include "../../math/long_dtype.h"
#include "../../math/gemm.h"
#include "../../math/gemv.h"
#include "../../math/math.h"
#include "../common.h"
#include "dense.h"

/*
 * Macros
 */

/*
 * Global Variables
 */

/*
 * Forward Declarations
 */

namespace nm { namespace dense_storage {

  template<typename LDType, typename RDType>
  void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);

  template <typename LDType, typename RDType>
  DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);

  template <typename LDType, typename RDType>
  bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right);

  template <typename DType>
  static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);

  template <typename DType>
  bool is_hermitian(const DENSE_STORAGE* mat, int lda);

  template <typename DType>
  bool is_symmetric(const DENSE_STORAGE* mat, int lda);


  /*
   * Recursive slicing for N-dimensional matrix.
   */
  template <typename LDType, typename RDType>
  static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n) {
    if (src->dim - n > 1) {
      for (size_t i = 0; i < lengths[n]; ++i) {
        slice_copy<LDType,RDType>(dest, src, lengths,
                   pdest + dest->stride[n]*i,
                   psrc + src->stride[n]*i,
                   n + 1);
      }
    } else {
      for (size_t p = 0; p < dest->shape[n]; ++p) {
        reinterpret_cast<LDType*>(dest->elements)[p+pdest] = reinterpret_cast<RDType*>(src->elements)[p+psrc];
      }
      /*memcpy((char*)dest->elements + pdest*DTYPE_SIZES[dest->dtype],
          (char*)src->elements + psrc*DTYPE_SIZES[src->dtype],
          dest->shape[n]*DTYPE_SIZES[dest->dtype]); */
    }

  }

  /*
   * Recursive function, sets multiple values in a matrix from a single source value. Same basic pattern as slice_copy.
   */
  template <typename D>
  static void slice_set(DENSE_STORAGE* dest, size_t* lengths, size_t pdest, size_t rank, D* const v, size_t v_size, size_t& v_offset) {
    if (dest->dim - rank > 1) {
      for (size_t i = 0; i < lengths[rank]; ++i) {
        slice_set<D>(dest, lengths, pdest + dest->stride[rank] * i, rank + 1, v, v_size, v_offset);
      }
    } else {
      for (size_t p = 0; p < lengths[rank]; ++p, ++v_offset) {
        if (v_offset >= v_size) v_offset %= v_size;

        D* elem = reinterpret_cast<D*>(dest->elements);
        elem[p + pdest] = v[v_offset];
      }
    }
  }


  /*
   * Dense storage set/slice-set function, templated version.
   */
  template <typename D>
  void set(VALUE left, SLICE* slice, VALUE right) {
    NM_CONSERVATIVE(nm_register_value(&left));
    NM_CONSERVATIVE(nm_register_value(&right));

    DENSE_STORAGE* s = NM_STORAGE_DENSE(left);

    std::pair<NMATRIX*,bool> nm_and_free =
      interpret_arg_as_dense_nmatrix(right, s->dtype);

    // Map the data onto D* v.
    D*     v;
    size_t v_size = 1;

    if (nm_and_free.first) {
      DENSE_STORAGE* t = reinterpret_cast<DENSE_STORAGE*>(nm_and_free.first->storage);
      v                = reinterpret_cast<D*>(t->elements);
      v_size           = nm_storage_count_max_elements(t);

    } else if (RB_TYPE_P(right, T_ARRAY)) {
      
      v_size = RARRAY_LEN(right);
      v      = NM_ALLOC_N(D, v_size);
      if (s->dtype == nm::RUBYOBJ)
        nm_register_values(reinterpret_cast<VALUE*>(v), v_size);

      for (size_t m = 0; m < v_size; ++m) {
        rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
      }

    } else {
      v = reinterpret_cast<D*>(rubyobj_to_cval(right, NM_DTYPE(left)));
      if (s->dtype == nm::RUBYOBJ)
        nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
    }

    if (slice->single) {
      reinterpret_cast<D*>(s->elements)[nm_dense_storage_pos(s, slice->coords)] = *v;
    } else {
      size_t v_offset = 0;
      slice_set(s, slice->lengths, nm_dense_storage_pos(s, slice->coords), 0, v, v_size, v_offset);
    }

    // Only free v if it was allocated in this function.
    if (nm_and_free.first) {
      if (nm_and_free.second) {
        nm_delete(nm_and_free.first);
      }
    } else {
      if (s->dtype == nm::RUBYOBJ)
        nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
      NM_FREE(v);
    }
    NM_CONSERVATIVE(nm_unregister_value(&left));
    NM_CONSERVATIVE(nm_unregister_value(&right));

  }

}} // end of namespace nm::dense_storage


extern "C" {

static size_t* stride(size_t* shape, size_t dim);
static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n);

/*
 * Functions
 */

///////////////
// Lifecycle //
///////////////


/*
 * This creates a dummy with all the properties of dense storage, but no actual elements allocation.
 *
 * elements will be NULL when this function finishes. You can clean up with nm_dense_storage_delete, which will
 * check for that NULL pointer before freeing elements.
 */
static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* shape, size_t dim) {
  DENSE_STORAGE* s = NM_ALLOC( DENSE_STORAGE );

  s->dim        = dim;
  s->shape      = shape;
  s->dtype      = dtype;

  s->offset     = NM_ALLOC_N(size_t, dim);
  memset(s->offset, 0, sizeof(size_t)*dim);

  s->stride     = stride(shape, dim);
  s->count      = 1;
  s->src        = s;

  s->elements   = NULL;

  return s;
}


/*
 * Note that elements and elements_length are for initial value(s) passed in.
 * If they are the correct length, they will be used directly. If not, they
 * will be concatenated over and over again into a new elements array. If
 * elements is NULL, the new elements array will not be initialized.
 */
DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
  if (dtype == nm::RUBYOBJ)
    nm_register_values(reinterpret_cast<VALUE*>(elements), elements_length);

  DENSE_STORAGE* s = nm_dense_storage_create_dummy(dtype, shape, dim);
  size_t count  = nm_storage_count_max_elements(s);

  if (elements_length == count) {
    s->elements = elements;
    
    if (dtype == nm::RUBYOBJ)
      nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);

  } else {

    s->elements = NM_ALLOC_N(char, DTYPE_SIZES[dtype]*count);

    if (dtype == nm::RUBYOBJ)
      nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);

    size_t copy_length = elements_length;

    if (elements_length > 0) {
      // Repeat elements over and over again until the end of the matrix.
      for (size_t i = 0; i < count; i += elements_length) {

        if (i + elements_length > count) {
          copy_length = count - i;
        }

        memcpy((char*)(s->elements)+i*DTYPE_SIZES[dtype], (char*)(elements)+(i % elements_length)*DTYPE_SIZES[dtype], copy_length*DTYPE_SIZES[dtype]);
      }

      // Get rid of the init_val.
      NM_FREE(elements);
    }
  }

  return s;
}


/*
 * Destructor for dense storage. Make sure when you update this you also update nm_dense_storage_delete_dummy.
 */
void nm_dense_storage_delete(STORAGE* s) {
  // Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
  if (s) {
    DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
    if(storage->count-- == 1) {
      NM_FREE(storage->shape);
      NM_FREE(storage->offset);
      NM_FREE(storage->stride);
      if (storage->elements != NULL) {// happens with dummy objects
        NM_FREE(storage->elements);
      }
      NM_FREE(storage);
    }
  }
}

/*
 * Destructor for dense storage references (slicing).
 */
void nm_dense_storage_delete_ref(STORAGE* s) {
  // Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
  if (s) {
    DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
    nm_dense_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
    NM_FREE(storage->shape);
    NM_FREE(storage->offset);
    NM_FREE(storage);
  }
}

/*
 * Mark values in a dense matrix for garbage collection. This may not be necessary -- further testing required.
 */
void nm_dense_storage_mark(STORAGE* storage_base) {

  DENSE_STORAGE* storage = (DENSE_STORAGE*)storage_base;

  if (storage && storage->dtype == nm::RUBYOBJ) {
    VALUE* els = reinterpret_cast<VALUE*>(storage->elements);

    if (els) {
      rb_gc_mark_locations(els, &(els[nm_storage_count_max_elements(storage)-1]));
    }
    //for (size_t index = nm_storage_count_max_elements(storage); index-- > 0;) {
    //  rb_gc_mark(els[index]);
    //}
  }
}

/**
 * Register a dense storage struct as in-use to avoid garbage collection of the
 * elements stored.
 *
 * This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
 *
 */
void nm_dense_storage_register(const STORAGE* s) {
  const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
  if (storage->dtype == nm::RUBYOBJ && storage->elements) {
    nm_register_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
  }
}

/**
 * Unregister a dense storage struct to allow normal garbage collection of the
 * elements stored.
 *
 * This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
 *
 */
void nm_dense_storage_unregister(const STORAGE* s) {
  const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
  if (storage->dtype == nm::RUBYOBJ && storage->elements) {
    nm_unregister_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
  }
}

///////////////
// Accessors //
///////////////


/*
 * map_pair iterator for dense matrices (for element-wise operations)
 */
VALUE nm_dense_map_pair(VALUE self, VALUE right) {

  NM_CONSERVATIVE(nm_register_value(&self));
  NM_CONSERVATIVE(nm_register_value(&right));

  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&right));
  NM_CONSERVATIVE(nm_unregister_value(&self));
  RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);

  DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
                *t = NM_STORAGE_DENSE(right);

  size_t* coords = NM_ALLOCA_N(size_t, s->dim);
  memset(coords, 0, sizeof(size_t) * s->dim);

  size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
  memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);

  size_t count = nm_storage_count_max_elements(s);

  DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);

  VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
  nm_dense_storage_register(result);

  for (size_t k = 0; k < count; ++k) {
    nm_dense_storage_coords(result, k, coords);
    size_t s_index = nm_dense_storage_pos(s, coords),
           t_index = nm_dense_storage_pos(t, coords);

    VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : nm::rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
    nm_register_value(&sval);
    VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : nm::rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
    result_elem[k] = rb_yield_values(2, sval, tval);
    nm_unregister_value(&sval);
  }

  VALUE klass = CLASS_OF(self);
  NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
  nm_register_nmatrix(m);
  VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);

  nm_unregister_nmatrix(m);
  nm_dense_storage_unregister(result);
  NM_CONSERVATIVE(nm_unregister_value(&self));
  NM_CONSERVATIVE(nm_unregister_value(&right));

  return to_return;

}

/*
 * map enumerator for dense matrices.
 */
VALUE nm_dense_map(VALUE self) {

  NM_CONSERVATIVE(nm_register_value(&self));

  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&self));
  RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);

  DENSE_STORAGE *s = NM_STORAGE_DENSE(self);

  size_t* coords = NM_ALLOCA_N(size_t, s->dim);
  memset(coords, 0, sizeof(size_t) * s->dim);

  size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
  memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);

  size_t count = nm_storage_count_max_elements(s);

  DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);

  VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);

  nm_dense_storage_register(result);

  for (size_t k = 0; k < count; ++k) {
    nm_dense_storage_coords(result, k, coords);
    size_t s_index = nm_dense_storage_pos(s, coords);

    result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : nm::rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
  }

  VALUE klass = CLASS_OF(self);

  NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
  nm_register_nmatrix(m);

  VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);

  nm_unregister_nmatrix(m);
  nm_dense_storage_unregister(result);
  NM_CONSERVATIVE(nm_unregister_value(&self));

  return to_return;
}


/*
 * each_with_indices iterator for dense matrices.
 */
VALUE nm_dense_each_with_indices(VALUE nmatrix) {

  NM_CONSERVATIVE(nm_register_value(&nmatrix));
  
  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
  DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);

  // Create indices and initialize them to zero
  size_t* coords = NM_ALLOCA_N(size_t, s->dim);
  memset(coords, 0, sizeof(size_t) * s->dim);

  size_t slice_index;
  size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
  memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);

  DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);

  for (size_t k = 0; k < nm_storage_count_max_elements(s); ++k) {
    nm_dense_storage_coords(sliced_dummy, k, coords);
    slice_index = nm_dense_storage_pos(s, coords);
    VALUE ary = rb_ary_new();
    nm_register_value(&ary);
    if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
    else rb_ary_push(ary, nm::rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval);

    for (size_t p = 0; p < s->dim; ++p) {
      rb_ary_push(ary, INT2FIX(coords[p]));
    }

    // yield the array which now consists of the value and the indices
    rb_yield(ary);
    nm_unregister_value(&ary);
  }

  nm_dense_storage_delete(sliced_dummy);

  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));

  return nmatrix;

}


/*
 * Borrowed this function from NArray. Handles 'each' iteration on a dense
 * matrix.
 *
 * Additionally, handles separately matrices containing VALUEs and matrices
 * containing other types of data.
 */
VALUE nm_dense_each(VALUE nmatrix) {

  NM_CONSERVATIVE(nm_register_value(&nmatrix));

  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length);

  DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);

  size_t* temp_coords = NM_ALLOCA_N(size_t, s->dim);
  size_t sliced_index;
  size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
  memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
  DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);

  if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) {

    // matrix of Ruby objects -- yield those objects directly
    for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
      nm_dense_storage_coords(sliced_dummy, i, temp_coords);
      sliced_index = nm_dense_storage_pos(s, temp_coords);
      rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
    }

  } else {

    // We're going to copy the matrix element into a Ruby VALUE and then operate on it. This way user can't accidentally
    // modify it and cause a seg fault.
    for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
      nm_dense_storage_coords(sliced_dummy, i, temp_coords);
      sliced_index = nm_dense_storage_pos(s, temp_coords);
      VALUE v = nm::rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval;
      rb_yield( v ); // yield to the copy we made
    }
  }

  nm_dense_storage_delete(sliced_dummy);
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));

  return nmatrix;

}


/*
 * Non-templated version of nm::dense_storage::slice_copy
 */
static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n) {
  NAMED_LR_DTYPE_TEMPLATE_TABLE(slice_copy_table, nm::dense_storage::slice_copy, void, DENSE_STORAGE*, const DENSE_STORAGE*, size_t*, size_t, size_t, size_t)

  slice_copy_table[dest->dtype][src->dtype](dest, src, lengths, pdest, psrc, n);
}


/*
 * Get a slice or one element, using copying.
 *
 * FIXME: Template the first condition.
 */
void* nm_dense_storage_get(const STORAGE* storage, SLICE* slice) {
  DENSE_STORAGE* s = (DENSE_STORAGE*)storage;
  if (slice->single)
    return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
  else {
    nm_dense_storage_register(s);
    size_t *shape      = NM_ALLOC_N(size_t, s->dim);
    for (size_t i = 0; i < s->dim; ++i) {
      shape[i]  = slice->lengths[i];
    }

    DENSE_STORAGE* ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);

    slice_copy(ns,
        reinterpret_cast<const DENSE_STORAGE*>(s->src),
        slice->lengths,
        0,
        nm_dense_storage_pos(s, slice->coords),
        0);

    nm_dense_storage_unregister(s);
    return ns;
  }
}

/*
 * Get a slice or one element by reference (no copy).
 *
 * FIXME: Template the first condition.
 */
void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
  DENSE_STORAGE* s = (DENSE_STORAGE*)storage;

  if (slice->single)
    return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];

  else {
    nm_dense_storage_register(s);
    DENSE_STORAGE* ns = NM_ALLOC( DENSE_STORAGE );
    ns->dim        = s->dim;
    ns->dtype      = s->dtype;
    ns->offset     = NM_ALLOC_N(size_t, ns->dim);
    ns->shape      = NM_ALLOC_N(size_t, ns->dim);

    for (size_t i = 0; i < ns->dim; ++i) {
      ns->offset[i] = slice->coords[i] + s->offset[i];
      ns->shape[i]  = slice->lengths[i];
    }

    ns->stride     = s->stride;
    ns->elements   = s->elements;

    s->src->count++;
    ns->src = s->src;

    nm_dense_storage_unregister(s);
    return ns;
  }
}


/*
 * Set a value or values in a dense matrix. Requires that right be either a single value or an NMatrix (ref or real).
 */
void nm_dense_storage_set(VALUE left, SLICE* slice, VALUE right) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::set, void, VALUE, SLICE*, VALUE)
  nm::dtype_t dtype = NM_DTYPE(left);
  ttable[dtype](left, slice, right);
}


///////////
// Tests //
///////////

/*
 * Do these two dense matrices have the same contents?
 *
 * TODO: Test the shape of the two matrices.
 * TODO: See if using memcmp is faster when the left- and right-hand matrices
 *        have the same dtype.
 */
bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
  LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)

  if (!ttable[left->dtype][right->dtype]) {
    rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
    return false;
  }

  return ttable[left->dtype][right->dtype]((const DENSE_STORAGE*)left, (const DENSE_STORAGE*)right);
}

/*
 * Test to see if the matrix is Hermitian.  If the matrix does not have a
 * dtype of Complex64 or Complex128 this is the same as testing for symmetry.
 */
bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
  if (mat->dtype == nm::COMPLEX64) {
    return nm::dense_storage::is_hermitian<nm::Complex64>(mat, lda);

  } else if (mat->dtype == nm::COMPLEX128) {
    return nm::dense_storage::is_hermitian<nm::Complex128>(mat, lda);

  } else {
    return nm_dense_storage_is_symmetric(mat, lda);
  }
}

/*
 * Is this dense matrix symmetric about the diagonal?
 */
bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
  DTYPE_TEMPLATE_TABLE(nm::dense_storage::is_symmetric, bool, const DENSE_STORAGE*, int);

  return ttable[mat->dtype](mat, lda);
}

//////////
// Math //
//////////


/*
 * Dense matrix-matrix multiplication.
 */
STORAGE* nm_dense_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
  DTYPE_TEMPLATE_TABLE(nm::dense_storage::matrix_multiply, DENSE_STORAGE*, const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);

  return ttable[casted_storage.left->dtype](casted_storage, resulting_shape, vector);
}

/////////////
// Utility //
/////////////

/*
 * Determine the linear array position (in elements of s) of some set of coordinates
 * (given by slice).
 */
size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
  size_t pos = 0;

  for (size_t i = 0; i < s->dim; ++i)
    pos += (coords[i] + s->offset[i]) * s->stride[i];

  return pos;

}

/*
 * Determine the a set of slice coordinates from linear array position (in elements
 * of s) of some set of coordinates (given by slice).  (Inverse of
 * nm_dense_storage_pos).
 *
 * The parameter coords_out should be a pre-allocated array of size equal to s->dim.
 */
void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, size_t* coords_out) {

  size_t temp_pos = slice_pos;

  for (size_t i = 0; i < s->dim; ++i) {
    coords_out[i] = (temp_pos - temp_pos % s->stride[i])/s->stride[i] - s->offset[i];
    temp_pos = temp_pos % s->stride[i];
  }
}

/*
 * Calculate the stride length.
 */
static size_t* stride(size_t* shape, size_t dim) {
  size_t i, j;
  size_t* stride = NM_ALLOC_N(size_t, dim);

  for (i = 0; i < dim; ++i) {
    stride[i] = 1;
    for (j = i+1; j < dim; ++j) {
      stride[i] *= shape[j];
    }
  }

  return stride;
}


/////////////////////////
// Copying and Casting //
/////////////////////////

/*
 * Copy dense storage, changing dtype if necessary.
 */
STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);

  if (!ttable[new_dtype][rhs->dtype]) {
    rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
    return NULL;
  }

  return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
}

/*
 * Copy dense storage without a change in dtype.
 */
DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
  nm_dense_storage_register(rhs);

  size_t  count = 0;
  size_t *shape  = NM_ALLOC_N(size_t, rhs->dim);

  // copy shape and offset
  for (size_t i = 0; i < rhs->dim; ++i) {
    shape[i]  = rhs->shape[i];
  }

  DENSE_STORAGE* lhs = nm_dense_storage_create(rhs->dtype, shape, rhs->dim, NULL, 0);
  count = nm_storage_count_max_elements(lhs);


  // Ensure that allocation worked before copying.
  if (lhs && count) {
    if (rhs == rhs->src) // not a reference
      memcpy(lhs->elements, rhs->elements, DTYPE_SIZES[rhs->dtype] * count);
    else { // slice whole matrix
      nm_dense_storage_register(lhs);
      size_t *offset = NM_ALLOC_N(size_t, rhs->dim);
      memset(offset, 0, sizeof(size_t) * rhs->dim);

      slice_copy(lhs,
           reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
           rhs->shape,
           0,
           nm_dense_storage_pos(rhs, offset),
           0);

      nm_dense_storage_unregister(lhs);
    }
  }

  nm_dense_storage_unregister(rhs);

  return lhs;
}


/*
 * Transpose dense storage into a new dense storage object. Basically a copy constructor.
 *
 * Not much point in templating this as it's pretty straight-forward.
 */
STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
  DENSE_STORAGE* rhs = (DENSE_STORAGE*)rhs_base;

  nm_dense_storage_register(rhs);
  size_t *shape = NM_ALLOC_N(size_t, rhs->dim);

  // swap shape
  shape[0] = rhs->shape[1];
  shape[1] = rhs->shape[0];

  DENSE_STORAGE *lhs = nm_dense_storage_create(rhs->dtype, shape, rhs->dim, NULL, 0);

  nm_dense_storage_register(lhs);

  if (rhs_base->src == rhs_base) {
    nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
  } else {
    NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);

    if (!ttable[lhs->dtype][rhs->dtype]) {
      nm_dense_storage_unregister(rhs);
      nm_dense_storage_unregister(lhs);      
      rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
    }

    ttable[lhs->dtype][rhs->dtype](rhs, lhs);
  }

  nm_dense_storage_unregister(rhs);
  nm_dense_storage_unregister(lhs);

  return (STORAGE*)lhs;
}

} // end of extern "C" block

namespace nm {

/*
 * Used for slice setting. Takes the right-hand of the equal sign, a single VALUE, and massages
 * it into the correct form if it's not already there (dtype, non-ref, dense). Returns a pair of the NMATRIX* and a
 * boolean. If the boolean is true, the calling function is responsible for calling nm_delete on the NMATRIX*.
 * Otherwise, the NMATRIX* still belongs to Ruby and Ruby will free it.
 */
std::pair<NMATRIX*,bool> interpret_arg_as_dense_nmatrix(VALUE right, nm::dtype_t dtype) {
  NM_CONSERVATIVE(nm_register_value(&right));
  if (IsNMatrixType(right)) {
    NMATRIX *r;
    if (NM_STYPE(right) != DENSE_STORE || NM_DTYPE(right) != dtype || NM_SRC(right) != NM_STORAGE(right)) {
      UnwrapNMatrix( right, r );
      NMATRIX* ldtype_r = nm_cast_with_ctype_args(r, nm::DENSE_STORE, dtype, NULL);
      NM_CONSERVATIVE(nm_unregister_value(&right));
      return std::make_pair(ldtype_r,true);
    } else {  // simple case -- right-hand matrix is dense and is not a reference and has same dtype
      UnwrapNMatrix( right, r );
      NM_CONSERVATIVE(nm_unregister_value(&right));
      return std::make_pair(r, false);
    }
    // Do not set v_alloc = true for either of these. It is the responsibility of r/ldtype_r
  } else if (RB_TYPE_P(right, T_DATA)) {
    NM_CONSERVATIVE(nm_unregister_value(&right));
    rb_raise(rb_eTypeError, "unrecognized type for slice assignment");
  }

  NM_CONSERVATIVE(nm_unregister_value(&right));
  return std::make_pair<NMATRIX*,bool>(NULL, false);
}


namespace dense_storage {

/////////////////////////
// Templated Functions //
/////////////////////////

template<typename LDType, typename RDType>
void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {

  nm_dense_storage_register(rhs);
  nm_dense_storage_register(lhs);

  LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
  RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
  size_t count = nm_storage_count_max_elements(lhs);;
  size_t* temp_coords = NM_ALLOCA_N(size_t, lhs->dim);
  size_t coord_swap_temp;

  while (count-- > 0) {
    nm_dense_storage_coords(lhs, count, temp_coords);
    NM_SWAP(temp_coords[0], temp_coords[1], coord_swap_temp);
    size_t r_coord = nm_dense_storage_pos(rhs, temp_coords);
    lhs_els[count] = rhs_els[r_coord];
  }

  nm_dense_storage_unregister(rhs);
  nm_dense_storage_unregister(lhs);

}

template <typename LDType, typename RDType>
DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
  nm_dense_storage_register(rhs);

  size_t  count = nm_storage_count_max_elements(rhs);

  size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
  memcpy(shape, rhs->shape, sizeof(size_t) * rhs->dim);

  DENSE_STORAGE* lhs = nm_dense_storage_create(new_dtype, shape, rhs->dim, NULL, 0);

  nm_dense_storage_register(lhs);

  // Ensure that allocation worked before copying.
  if (lhs && count) {
    if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
      size_t* offset      = NM_ALLOCA_N(size_t, rhs->dim);
      memset(offset, 0, sizeof(size_t) * rhs->dim);

      slice_copy(lhs, reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
                 rhs->shape, 0,
                 nm_dense_storage_pos(rhs, offset), 0);

    } else {              // Make a regular copy.
      RDType* rhs_els          = reinterpret_cast<RDType*>(rhs->elements);
      LDType* lhs_els          = reinterpret_cast<LDType*>(lhs->elements);

      for (size_t i = 0; i < count; ++i)
        lhs_els[i] = rhs_els[i];
    }
  }

  nm_dense_storage_unregister(rhs);
  nm_dense_storage_unregister(lhs);

  return lhs;
}

template <typename LDType, typename RDType>
bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
  nm_dense_storage_register(left);
  nm_dense_storage_register(right);

  size_t index;
  DENSE_STORAGE *tmp1, *tmp2;
  tmp1 = NULL; tmp2 = NULL;
  bool result = true;
  /* FIXME: Very strange behavior! The GC calls the method directly with non-initialized data. */

  LDType* left_elements    = (LDType*)left->elements;
  RDType* right_elements  = (RDType*)right->elements;

  // Copy elements in temp matrix if you have reference to the right.
  if (left->src != left) {
    tmp1 = nm_dense_storage_copy(left);
    nm_dense_storage_register(tmp1);
    left_elements = (LDType*)tmp1->elements;
  }
  if (right->src != right) {
    tmp2 = nm_dense_storage_copy(right);
    nm_dense_storage_register(tmp2);
    right_elements = (RDType*)tmp2->elements;
  }


  for (index = nm_storage_count_max_elements(left); index-- > 0;) {
    if (left_elements[index] != right_elements[index]) {
      result = false;
      break;
    }
  }

  if (tmp1) {
    nm_dense_storage_unregister(tmp1);
    NM_FREE(tmp1);
  }
  if (tmp2) {
    nm_dense_storage_unregister(tmp2);
    NM_FREE(tmp2);
  }

  nm_dense_storage_unregister(left);
  nm_dense_storage_unregister(right);
  return result;
}

template <typename DType>
bool is_hermitian(const DENSE_STORAGE* mat, int lda) {
  unsigned int i, j;
  DType complex_conj;

  const DType* els = (DType*) mat->elements;

  for (i = mat->shape[0]; i-- > 0;) {
    for (j = i + 1; j < mat->shape[1]; ++j) {
      complex_conj    = els[j*lda + i];
      complex_conj.i  = -complex_conj.i;

      if (els[i*lda+j] != complex_conj) {
        return false;
      }
    }
  }

  return true;
}

template <typename DType>
bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
  unsigned int i, j;
  const DType* els = (DType*) mat->elements;

  for (i = mat->shape[0]; i-- > 0;) {
    for (j = i + 1; j < mat->shape[1]; ++j) {
      if (els[i*lda+j] != els[j*lda+i]) {
        return false;
      }
    }
  }

  return true;
}


/*
 * DType-templated matrix-matrix multiplication for dense storage.
 */
template <typename DType>
static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
  DENSE_STORAGE *left  = (DENSE_STORAGE*)(casted_storage.left),
                *right = (DENSE_STORAGE*)(casted_storage.right);

  nm_dense_storage_register(left);
  nm_dense_storage_register(right);

  // Create result storage.
  DENSE_STORAGE* result = nm_dense_storage_create(left->dtype, resulting_shape, 2, NULL, 0);

  nm_dense_storage_register(result);

  DType *pAlpha = NM_ALLOCA_N(DType, 1),
        *pBeta  = NM_ALLOCA_N(DType, 1);

  *pAlpha = 1;
  *pBeta = 0;
  // Do the multiplication
  if (vector) nm::math::gemv<DType>(CblasNoTrans, left->shape[0], left->shape[1], pAlpha,
                                    reinterpret_cast<DType*>(left->elements), left->shape[1],
                                    reinterpret_cast<DType*>(right->elements), 1, pBeta,
                                    reinterpret_cast<DType*>(result->elements), 1);
  else        nm::math::gemm<DType>(CblasRowMajor, CblasNoTrans, CblasNoTrans, left->shape[0], right->shape[1], left->shape[1],
                                    pAlpha, reinterpret_cast<DType*>(left->elements), left->shape[1],
                                    reinterpret_cast<DType*>(right->elements), right->shape[1], pBeta,
                                    reinterpret_cast<DType*>(result->elements), result->shape[1]);


  nm_dense_storage_unregister(left);
  nm_dense_storage_unregister(right);
  nm_dense_storage_unregister(result);

  return result;
}

}} // end of namespace nm::dense_storage


================================================
FILE: ext/nmatrix/storage/dense/dense.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == dense.h
//
// Dense n-dimensional matrix storage.

#ifndef DENSE_H
#define DENSE_H

/*
 * Standard Includes
 */

#include <ruby.h>
#include <cstdlib>

/*
 * Project Includes
 */

#include "types.h"
//#include "util/math.h"

#include "data/data.h"

#include "../common.h"

#include "nmatrix.h"

/*
 * Macros
 */

/*
 * Types
 */

/*
 * Data
 */

extern "C" {

/*
 * Functions
 */

///////////////
// Lifecycle //
///////////////

DENSE_STORAGE*  nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length);
void            nm_dense_storage_delete(STORAGE* s);
void            nm_dense_storage_delete_ref(STORAGE* s);
void            nm_dense_storage_mark(STORAGE*);
void            nm_dense_storage_register(const STORAGE* s);
void            nm_dense_storage_unregister(const STORAGE* s);


///////////////
// Accessors //
///////////////


VALUE nm_dense_map_pair(VALUE self, VALUE right);
VALUE nm_dense_map(VALUE self);
VALUE nm_dense_each(VALUE nmatrix);
VALUE nm_dense_each_with_indices(VALUE nmatrix);
void*  nm_dense_storage_get(const STORAGE* s, SLICE* slice);
void*  nm_dense_storage_ref(const STORAGE* s, SLICE* slice);
void  nm_dense_storage_set(VALUE left, SLICE* slice, VALUE right);

///////////
// Tests //
///////////

bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right);
bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda);
bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda);

//////////
// Math //
//////////

STORAGE* nm_dense_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);

/////////////
// Utility //
/////////////

size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords);
void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, size_t* coords_out);

/////////////////////////
// Copying and Casting //
/////////////////////////

DENSE_STORAGE*  nm_dense_storage_copy(const DENSE_STORAGE* rhs);
STORAGE*        nm_dense_storage_copy_transposed(const STORAGE* rhs_base);
STORAGE*        nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void*);

} // end of extern "C" block

namespace nm {
  std::pair<NMATRIX*,bool> interpret_arg_as_dense_nmatrix(VALUE right, nm::dtype_t dtype);
} // end of namespace nm

#endif // DENSE_H


================================================
FILE: ext/nmatrix/storage/list/list.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == list.c
//
// List-of-lists n-dimensional matrix storage. Uses singly-linked
// lists.
/*
 * Standard Includes
 */

#include <ruby.h>
#include <algorithm> // std::min
#include <iostream>
#include <vector>
#include <list>

/*
 * Project Includes
 */

#include "../../types.h"

#include "../../data/data.h"

#include "../dense/dense.h"
#include "../common.h"
#include "list.h"

#include "../../math/math.h"
#include "../../util/sl_list.h"

/*
 * Macros
 */

/*
 * Global Variables
 */


extern "C" {
static void slice_set_single(LIST_STORAGE* dest, LIST* l, void* val, size_t* coords, size_t* lengths, size_t n);
static void __nm_list_storage_unregister_temp_value_list(std::list<VALUE*>& temp_vals);
static void __nm_list_storage_unregister_temp_list_list(std::list<LIST*>& temp_vals, size_t recursions);
}

namespace nm { namespace list_storage {

/*
 * Forward Declarations
 */

class RecurseData {
public:
  // Note that providing init_obj argument does not override init.
  RecurseData(const LIST_STORAGE* s, VALUE init_obj__ = Qnil) : ref(s), actual(s), shape_(s->shape), offsets(s->dim, 0), init_(s->default_val), init_obj_(init_obj__) {
    while (actual->src != actual) {
      for (size_t i = 0; i < s->dim; ++i) // update offsets as we recurse
        offsets[i] += actual->offset[i];
      actual = reinterpret_cast<LIST_STORAGE*>(actual->src);
    }
    nm_list_storage_register(actual);
    nm_list_storage_register(ref);
    actual_shape_ = actual->shape;

    if (init_obj_ == Qnil) {
      init_obj_ = s->dtype == nm::RUBYOBJ ? *reinterpret_cast<VALUE*>(s->default_val) : nm::rubyobj_from_cval(s->default_val, s->dtype).rval;
    }
    nm_register_value(&init_obj_);
  }

  ~RecurseData() {
    nm_unregister_value(&init_obj_);
    nm_list_storage_unregister(ref);
    nm_list_storage_unregister(actual);
  }

  dtype_t dtype() const { return ref->dtype; }


  size_t dim() const { return ref->dim; }

  size_t ref_shape(size_t rec) const {
    return shape_[ref->dim - rec - 1];
  }

  size_t* copy_alloc_shape() const {
    size_t* new_shape = NM_ALLOC_N(size_t, ref->dim);
    memcpy(new_shape, shape_, sizeof(size_t)*ref->dim);
    return new_shape;
  }

  size_t actual_shape(size_t rec) const {
    return actual_shape_[actual->dim - rec - 1];
  }

  size_t offset(size_t rec) const {
    return offsets[ref->dim - rec - 1];
  }

  void* init() const {
    return init_;
  }

  VALUE init_obj() const { return init_obj_; }

  LIST* top_level_list() const {
    return reinterpret_cast<LIST*>(actual->rows);
  }

  const LIST_STORAGE* ref;
  const LIST_STORAGE* actual;

  size_t* shape_; // of ref
  size_t* actual_shape_;
protected:
  std::vector<size_t> offsets; // relative to actual
  void* init_;
  VALUE init_obj_;

};


template <typename LDType, typename RDType>
static LIST_STORAGE* cast_copy(const LIST_STORAGE* rhs, nm::dtype_t new_dtype);

template <typename LDType, typename RDType>
static bool eqeq_r(RecurseData& left, RecurseData& right, const LIST* l, const LIST* r, size_t rec);

template <typename SDType, typename TDType>
static bool eqeq_empty_r(RecurseData& s, const LIST* l, size_t rec, const TDType* t_init);

/*
 * Recursive helper for map_merged_stored_r which handles the case where one list is empty and the other is not.
 */
static void map_empty_stored_r(RecurseData& result, RecurseData& s, LIST* x, const LIST* l, size_t rec, bool rev, const VALUE& t_init) {
  if (s.dtype() == nm::RUBYOBJ) {
    nm_list_storage_register_list(l, rec);
  }
  if (result.dtype() == nm::RUBYOBJ) {
    nm_list_storage_register_list(x, rec);
  }

  NODE *curr  = l->first,
       *xcurr = NULL;

  // For reference matrices, make sure we start in the correct place.
  size_t offset   = s.offset(rec);
  size_t x_shape  = s.ref_shape(rec);

  while (curr && curr->key < offset) {  curr = curr->next;  }
  if (curr && curr->key - offset >= x_shape) curr = NULL;

  if (rec) {
    std::list<LIST*> temp_vals;
    while (curr) {
      LIST* val = nm::list::create();
      map_empty_stored_r(result, s, val, reinterpret_cast<const LIST*>(curr->val), rec-1, rev, t_init);

      if (!val->first) nm::list::del(val, 0);
      else {
        nm_list_storage_register_list(val, rec-1);
  temp_vals.push_front(val);
        nm::list::insert_helper(x, xcurr, curr->key - offset, val);
      }
      curr = curr->next;
      if (curr && curr->key - offset >= x_shape) curr = NULL;
    }
    __nm_list_storage_unregister_temp_list_list(temp_vals, rec-1);
  } else {
    std::list<VALUE*> temp_vals;
    while (curr) {
      VALUE val, s_val;
      if (s.dtype() == nm::RUBYOBJ) s_val = (*reinterpret_cast<nm::RubyObject*>(curr->val)).rval;
      else                          s_val = nm::rubyobj_from_cval(curr->val, s.dtype()).rval;

      if (rev) val = rb_yield_values(2, t_init, s_val);
      else     val = rb_yield_values(2, s_val, t_init);

      nm_register_value(&val);

      if (rb_funcall(val, rb_intern("!="), 1, result.init_obj()) == Qtrue) {
        xcurr = nm::list::insert_helper(x, xcurr, curr->key - offset, val);
        temp_vals.push_front(reinterpret_cast<VALUE*>(xcurr->val));
        nm_register_value(&*reinterpret_cast<VALUE*>(xcurr->val));
      }
      nm_unregister_value(&val);

      curr = curr->next;
      if (curr && curr->key - offset >= x_shape) curr = NULL;
    }
    __nm_list_storage_unregister_temp_value_list(temp_vals);
  }

  if (s.dtype() == nm::RUBYOBJ){
    nm_list_storage_unregister_list(l, rec);
  }
  if (result.dtype() == nm::RUBYOBJ) {
    nm_list_storage_unregister_list(x, rec);
  }

}


/*
 * Recursive helper function for nm_list_map_stored
 */
static void map_stored_r(RecurseData& result, RecurseData& left, LIST* x, const LIST* l, size_t rec) {
  if (left.dtype() == nm::RUBYOBJ) {
    nm_list_storage_register_list(l, rec);
  }
  if (result.dtype() == nm::RUBYOBJ) {
    nm_list_storage_register_list(x, rec);
  }
  NODE *lcurr = l->first,
       *xcurr = x->first;

  // For reference matrices, make sure we start in the correct place.
  while (lcurr && lcurr->key < left.offset(rec))  {  lcurr = lcurr->next;  }

  if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec))  lcurr = NULL;

  if (rec) {
    std::list<LIST*> temp_vals;
    while (lcurr) {
      size_t key;
      LIST*  val = nm::list::create();
      map_stored_r(result, left, val, reinterpret_cast<const LIST*>(lcurr->val), rec-1);
      key        = lcurr->key - left.offset(rec);
      lcurr      = lcurr->next;

      if (!val->first) nm::list::del(val, 0); // empty list -- don't insert
      else {
        nm_list_storage_register_list(val, rec-1);
        temp_vals.push_front(val);
        xcurr = nm::list::insert_helper(x, xcurr, key, val);
      }
      if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec)) lcurr = NULL;
    }
    __nm_list_storage_unregister_temp_list_list(temp_vals, rec-1);
  } else {
    std::list<VALUE*> temp_vals;
    while (lcurr) {
      size_t key;
      VALUE  val;

      val   = rb_yield_values(1, left.dtype() == nm::RUBYOBJ ? *reinterpret_cast<VALUE*>(lcurr->val) : nm::rubyobj_from_cval(lcurr->val, left.dtype()).rval);
      key   = lcurr->key - left.offset(rec);
      lcurr = lcurr->next;

      if (!rb_equal(val, result.init_obj())) {
        xcurr = nm::list::insert_helper(x, xcurr, key, val);
        temp_vals.push_front(reinterpret_cast<VALUE*>(xcurr->val));
        nm_register_value(&*reinterpret_cast<VALUE*>(xcurr->val));
      }

      if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec)) lcurr = NULL;
    }
    __nm_list_storage_unregister_temp_value_list(temp_vals);
  }

  if (left.dtype() == nm::RUBYOBJ) {
    nm_list_storage_unregister_list(l, rec);
  }
  if (result.dtype() == nm::RUBYOBJ) {
    nm_list_storage_unregister_list(x, rec);
  }
}


/*
 * Recursive helper function for nm_list_map_merged_stored
 */
static void map_merged_stored_r(RecurseData& result, RecurseData& left, RecurseData& right, LIST* x, const LIST* l, const LIST* r, size_t rec) {
  if (left.dtype() == nm::RUBYOBJ) {
    nm_list_storage_register_list(l, rec);
  }
  if (right.dtype() == nm::RUBYOBJ) {
    nm_list_storage_register_list(r, rec);
  }
  if (result.dtype() == nm::RUBYOBJ) {
    nm_list_storage_register_list(x, rec);
  }


  NODE *lcurr = l->first,
       *rcurr = r->first,
       *xcurr = x->first;

  // For reference matrices, make sure we start in the correct place.
  while (lcurr && lcurr->key < left.offset(rec))  {  lcurr = lcurr->next;  }
  while (rcurr && rcurr->key < right.offset(rec)) {  rcurr = rcurr->next;  }

  if (rcurr && rcurr->key - right.offset(rec) >= result.ref_shape(rec)) rcurr = NULL;
  if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec))  lcurr = NULL;

  if (rec) {
    std::list<LIST*> temp_vals;
    while (lcurr || rcurr) {
      size_t key;
      LIST*  val = nm::list::create();

      if (!rcurr || (lcurr && (lcurr->key - left.offset(rec) < rcurr->key - right.offset(rec)))) {
        map_empty_stored_r(result, left, val, reinterpret_cast<const LIST*>(lcurr->val), rec-1, false, right.init_obj());
        key   = lcurr->key - left.offset(rec);
        lcurr = lcurr->next;
      } else if (!lcurr || (rcurr && (rcurr->key - right.offset(rec) < lcurr->key - left.offset(rec)))) {
        map_empty_stored_r(result, right, val, reinterpret_cast<const LIST*>(rcurr->val), rec-1, true, left.init_obj());
        key   = rcurr->key - right.offset(rec);
        rcurr = rcurr->next;
      } else { // == and both present
        map_merged_stored_r(result, left, right, val, reinterpret_cast<const LIST*>(lcurr->val), reinterpret_cast<const LIST*>(rcurr->val), rec-1);
        key   = lcurr->key - left.offset(rec);
        lcurr = lcurr->next;
        rcurr = rcurr->next;
      }


      if (!val->first) nm::list::del(val, 0); // empty list -- don't insert
      else {
        nm_list_storage_register_list(val, rec-1);
        temp_vals.push_front(val);
        xcurr = nm::list::insert_helper(x, xcurr, key, val);
      }
      if (rcurr && rcurr->key - right.offset(rec) >= result.ref_shape(rec)) rcurr = NULL;
      if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec)) lcurr = NULL;
    }
    __nm_list_storage_unregister_temp_list_list(temp_vals, rec-1);
  } else {
    std::list<VALUE*> temp_vals;
    while (lcurr || rcurr) {
      size_t key;
      VALUE  val;

      if (!rcurr || (lcurr && (lcurr->key - left.offset(rec) < rcurr->key - right.offset(rec)))) {
        val   = rb_yield_values(2, nm::rubyobj_from_cval(lcurr->val, left.dtype()).rval, right.init_obj());
        key   = lcurr->key - left.offset(rec);
        lcurr = lcurr->next;
      } else if (!lcurr || (rcurr && (rcurr->key - right.offset(rec) < lcurr->key - left.offset(rec)))) {
        val   = rb_yield_values(2, left.init_obj(), nm::rubyobj_from_cval(rcurr->val, right.dtype()).rval);
        key   = rcurr->key - right.offset(rec);
        rcurr = rcurr->next;
      } else { // == and both present
        val   = rb_yield_values(2, nm::rubyobj_from_cval(lcurr->val, left.dtype()).rval, nm::rubyobj_from_cval(rcurr->val, right.dtype()).rval);
        key   = lcurr->key - left.offset(rec);
        lcurr = lcurr->next;
        rcurr = rcurr->next;
      }

      nm_register_value(&val);

      if (rb_funcall(val, rb_intern("!="), 1, result.init_obj()) == Qtrue) {
        xcurr = nm::list::insert_helper(x, xcurr, key, val);
        temp_vals.push_front(reinterpret_cast<VALUE*>(xcurr->val));
        nm_register_value(&*reinterpret_cast<VALUE*>(xcurr->val));
      }

      nm_unregister_value(&val);

      if (rcurr && rcurr->key - right.offset(rec) >= result.ref_shape(rec)) rcurr = NULL;
      if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec)) lcurr = NULL;
    }
    __nm_list_storage_unregister_temp_value_list(temp_vals);
  }

  if (left.dtype() == nm::RUBYOBJ) {
    nm_list_storage_unregister_list(l, rec);
  }
  if (right.dtype() == nm::RUBYOBJ) {
    nm_list_storage_unregister_list(r, rec);
  }
  if (result.dtype() == nm::RUBYOBJ) {
    nm_list_storage_unregister_list(x, rec);
  }
}


/*
 * Recursive function, sets multiple values in a matrix from multiple source values. Also handles removal; returns true
 * if the recursion results in an empty list at that level (which signals that the current parent should be removed).
 */
template <typename D>
static bool slice_set(LIST_STORAGE* dest, LIST* l, size_t* coords, size_t* lengths, size_t n, D* v, size_t v_size, size_t& v_offset) {
  using nm::list::node_is_within_slice;
  using nm::list::remove_by_node;
  using nm::list::find_preceding_from_list;
  using nm::list::insert_first_list;
  using nm::list::insert_first_node;
  using nm::list::insert_after;
  size_t* offsets = dest->offset;

  nm_list_storage_register(dest);
  if (dest->dtype == nm::RUBYOBJ) {
    nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
    nm_list_storage_register_list(l, dest->dim - n - 1);
  }

  // drill down into the structure
  NODE* prev = find_preceding_from_list(l, coords[n] + offsets[n]);
  NODE* node = NULL;
  if (prev) node = prev->next && node_is_within_slice(prev->next, coords[n] + offsets[n], lengths[n]) ? prev->next : NULL;
  else      node = node_is_within_slice(l->first, coords[n] + offsets[n], lengths[n]) ? l->first : NULL;

  if (dest->dim - n > 1) {
    size_t i    = 0;
    size_t key  = i + offsets[n] + coords[n];

    // Make sure we have an element to work with
    if (!node) {
      if (!prev) {
        node = insert_first_list(l, key, nm::list::create());
      } else {
        node = insert_after(prev, key, nm::list::create());
      }
    }

    // At this point, it's guaranteed that there is a list here matching key.
    std::list<LIST*> temp_lists;
    while (node) {
      // Recurse down into the list. If it returns true, it's empty, so we need to delete it.
      bool remove_parent = slice_set(dest, reinterpret_cast<LIST*>(node->val), coords, lengths, n+1, v, v_size, v_offset);
      if (dest->dtype == nm::RUBYOBJ) {
        temp_lists.push_front(reinterpret_cast<LIST*>(node->val));
        nm_list_storage_register_list(reinterpret_cast<LIST*>(node->val), dest->dim - n - 2);
      }
      if (remove_parent) {
        NM_FREE(remove_by_node(l, prev, node));
        if (prev) node = prev->next ? prev->next : NULL;
        else      node = l->first   ? l->first   : NULL;
      } else {  // move forward
        prev = node;
        node = node_is_within_slice(prev->next, key-i, lengths[n]) ? prev->next : NULL;
      }

      ++i; ++key;

      if (i >= lengths[n]) break;

      // Now do we need to insert another node here? Or is there already one?
      if (!node) {
        if (!prev) {
          node = insert_first_list(l, key, nm::list::create());
        } else {
          node = insert_after(prev, key, nm::list::create());
        }
      }
    }
    __nm_list_storage_unregister_temp_list_list(temp_lists, dest->dim - n - 2);

  } else {

    size_t i    = 0;
    size_t key  = i + offsets[n] + coords[n];
    std::list<VALUE*> temp_vals;
    while (i < lengths[n]) {
      // Make sure we have an element to work with
      if (v_offset >= v_size) v_offset %= v_size;

      if (node) {
        if (node->key == key) {
          if (v[v_offset] == *reinterpret_cast<D*>(dest->default_val)) { // remove zero value

            NM_FREE(remove_by_node(l, (prev ? prev : l->first), node));

            if (prev) node = prev->next ? prev->next : NULL;
            else      node = l->first   ? l->first   : NULL;

          } else { // edit directly
            *reinterpret_cast<D*>(node->val) = v[v_offset];
            prev = node;
            node = node->next ? node->next : NULL;
          }
        } else if (node->key > key) {
          D* nv = NM_ALLOC(D); *nv = v[v_offset++];
          if (dest->dtype == nm::RUBYOBJ) {
            nm_register_value(&*reinterpret_cast<VALUE*>(nv));
            temp_vals.push_front(reinterpret_cast<VALUE*>(nv));
          }

          if (prev) node = insert_after(prev, key, nv);
          else      node = insert_first_node(l, key, nv, sizeof(D));

          prev = node;
          node = prev->next ? prev->next : NULL;
        }
      } else { // no node -- insert a new one
        D* nv = NM_ALLOC(D); *nv = v[v_offset++];
        if (dest->dtype == nm::RUBYOBJ) {
          nm_register_value(&*reinterpret_cast<VALUE*>(nv));
          temp_vals.push_front(reinterpret_cast<VALUE*>(nv));
        }
        if (prev) node = insert_after(prev, key, nv);
        else      node = insert_first_node(l, key, nv, sizeof(D));

        prev = node;
        node = prev->next ? prev->next : NULL;
      }

      ++i; ++key;
    }
    __nm_list_storage_unregister_temp_value_list(temp_vals);
  }

  if (dest->dtype == nm::RUBYOBJ) {
    nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
    nm_list_storage_unregister_list(l, dest->dim - n - 1);
  }
  nm_list_storage_unregister(dest);

  return (l->first) ? false : true;
}


template <typename D>
void set(VALUE left, SLICE* slice, VALUE right) {
  NM_CONSERVATIVE(nm_register_value(&left));
  NM_CONSERVATIVE(nm_register_value(&right));
  LIST_STORAGE* s = NM_STORAGE_LIST(left);

  std::pair<NMATRIX*,bool> nm_and_free =
    interpret_arg_as_dense_nmatrix(right, NM_DTYPE(left));

  // Map the data onto D* v.
  D*     v;
  size_t v_size = 1;

  if (nm_and_free.first) {
    DENSE_STORAGE* t = reinterpret_cast<DENSE_STORAGE*>(nm_and_free.first->storage);
    v                = reinterpret_cast<D*>(t->elements);
    v_size           = nm_storage_count_max_elements(t);

  } else if (RB_TYPE_P(right, T_ARRAY)) {
    nm_register_nmatrix(nm_and_free.first);
    v_size = RARRAY_LEN(right);
    v      = NM_ALLOC_N(D, v_size);
    if (NM_DTYPE(left) == nm::RUBYOBJ)
        nm_register_values(reinterpret_cast<VALUE*>(v), v_size);

    for (size_t m = 0; m < v_size; ++m) {
      rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
    }
    if (NM_DTYPE(left) == nm::RUBYOBJ)
        nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);

  } else {
    nm_register_nmatrix(nm_and_free.first);
    v = reinterpret_cast<D*>(rubyobj_to_cval(right, NM_DTYPE(left)));
  }

  if (v_size == 1 && *v == *reinterpret_cast<D*>(s->default_val)) {
    if (*reinterpret_cast<D*>(nm_list_storage_get(s, slice)) != *reinterpret_cast<D*>(s->default_val)) {
      nm::list::remove_recursive(s->rows, slice->coords, s->offset, slice->lengths, 0, s->dim);
    }
  } else if (slice->single) {
    slice_set_single(s, s->rows, reinterpret_cast<void*>(v), slice->coords, slice->lengths, 0);
  } else {
    size_t v_offset = 0;
    slice_set<D>(s, s->rows, slice->coords, slice->lengths, 0, v, v_size, v_offset);
  }


  // Only free v if it was allocated in this function.
  if (nm_and_free.first) {
    if (nm_and_free.second) {
      nm_delete(nm_and_free.first);
    }
  } else {
    NM_FREE(v);
    nm_unregister_nmatrix(nm_and_free.first);
  }
  NM_CONSERVATIVE(nm_unregister_value(&left));
  NM_CONSERVATIVE(nm_unregister_value(&right));
}

/*
 * Used only to set a default initial value.
 */
template <typename D>
void init_default(LIST_STORAGE* s) {
  s->default_val = NM_ALLOC(D);
  *reinterpret_cast<D*>(s->default_val) = 0;
}


}} // end of namespace list_storage

extern "C" {

/*
 * Functions
 */


////////////////
// Lifecycle //
///////////////


/*
 * Creates a list-of-lists(-of-lists-of-lists-etc) storage framework for a
 * matrix.
 *
 * Note: The pointers you pass in for shape and init_val become property of our
 * new storage. You don't need to free them, and you shouldn't re-use them.
 */
LIST_STORAGE* nm_list_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* init_val) {
  LIST_STORAGE* s = NM_ALLOC( LIST_STORAGE );

  s->dim   = dim;
  s->shape = shape;
  s->dtype = dtype;

  s->offset = NM_ALLOC_N(size_t, s->dim);
  memset(s->offset, 0, s->dim * sizeof(size_t));

  s->rows  = nm::list::create();
  if (init_val)
    s->default_val = init_val;
  else {
    DTYPE_TEMPLATE_TABLE(nm::list_storage::init_default, void, LIST_STORAGE*)
    ttable[dtype](s);
  }
  s->count = 1;
  s->src = s;

  return s;
}

/*
 * Destructor for list storage.
 */
void nm_list_storage_delete(STORAGE* s) {
  if (s) {
    LIST_STORAGE* storage = (LIST_STORAGE*)s;
    if (storage->count-- == 1) {
      nm::list::del( storage->rows, storage->dim - 1 );

      NM_FREE(storage->shape);
      NM_FREE(storage->offset);
      NM_FREE(storage->default_val);
      NM_FREE(s);
    }
  }
}

/*
 * Destructor for a list storage reference slice.
 */
void nm_list_storage_delete_ref(STORAGE* s) {
  if (s) {
    LIST_STORAGE* storage = (LIST_STORAGE*)s;

    nm_list_storage_delete( reinterpret_cast<STORAGE*>(storage->src ) );
    NM_FREE(storage->shape);
    NM_FREE(storage->offset);
    NM_FREE(s);
  }
}

/*
 * GC mark function for list storage.
 */
void nm_list_storage_mark(STORAGE* storage_base) {
  LIST_STORAGE* storage = (LIST_STORAGE*)storage_base;

  if (storage && storage->dtype == nm::RUBYOBJ) {
    rb_gc_mark(*((VALUE*)(storage->default_val)));
    nm::list::mark(storage->rows, storage->dim - 1);
  }
}

static void __nm_list_storage_unregister_temp_value_list(std::list<VALUE*>& temp_vals) {
  for (std::list<VALUE*>::iterator it = temp_vals.begin(); it != temp_vals.end(); ++it) {
    nm_unregister_value(&**it);
  }
}

static void __nm_list_storage_unregister_temp_list_list(std::list<LIST*>& temp_vals, size_t recursions) {
  for (std::list<LIST*>::iterator it = temp_vals.begin(); it != temp_vals.end(); ++it) {
    nm_list_storage_unregister_list(*it, recursions);
  }
}

void nm_list_storage_register_node(const NODE* curr) {
  nm_register_value(&*reinterpret_cast<VALUE*>(curr->val));
}

void nm_list_storage_unregister_node(const NODE* curr) {
  nm_unregister_value(&*reinterpret_cast<VALUE*>(curr->val));
}

/**
 * Gets rid of all instances of a given node in the registration list.
 * Sometimes a node will get deleted and replaced deep in a recursion, but
 * further up it will still get registered.  This leads to a potential read
 * after free during the GC marking.  This function completely clears out a
 * node so that this won't happen.
 */
void nm_list_storage_completely_unregister_node(const NODE* curr) {
  nm_completely_unregister_value(&*reinterpret_cast<VALUE*>(curr->val));
}

void nm_list_storage_register_list(const LIST* list, size_t recursions) {
  NODE* next;
  if (!list) return;
  NODE* curr = list->first;

  while (curr != NULL) {
    next = curr->next;
    if (recursions == 0) {
      nm_list_storage_register_node(curr);
    } else {
      nm_list_storage_register_list(reinterpret_cast<LIST*>(curr->val), recursions - 1);
    }
    curr = next;
  }
}

void nm_list_storage_unregister_list(const LIST* list, size_t recursions) {
  NODE* next;
  if (!list) return;
  NODE* curr = list->first;

  while (curr != NULL) {
    next = curr->next;
    if (recursions == 0) {
      nm_list_storage_unregister_node(curr);
    } else {
      nm_list_storage_unregister_list(reinterpret_cast<LIST*>(curr->val), recursions - 1);
    }
    curr = next;
  }
}

void nm_list_storage_register(const STORAGE* s) {
  const LIST_STORAGE* storage = reinterpret_cast<const LIST_STORAGE*>(s);
  if (storage && storage->dtype == nm::RUBYOBJ) {
    nm_register_value(&*reinterpret_cast<VALUE*>(storage->default_val));
    nm_list_storage_register_list(storage->rows, storage->dim - 1);
  }
}

void nm_list_storage_unregister(const STORAGE* s) {
  const LIST_STORAGE* storage = reinterpret_cast<const LIST_STORAGE*>(s);
  if (storage && storage->dtype == nm::RUBYOBJ) {
    nm_unregister_value(&*reinterpret_cast<VALUE*>(storage->default_val));
    nm_list_storage_unregister_list(storage->rows, storage->dim - 1);
  }
}

///////////////
// Accessors //
///////////////

/*
 * Documentation goes here.
 */
static NODE* list_storage_get_single_node(LIST_STORAGE* s, SLICE* slice) {
  LIST* l = s->rows;
  NODE* n;

  for (size_t r = 0; r < s->dim; r++) {
    n = nm::list::find(l, s->offset[r] + slice->coords[r]);

    if (n) l = reinterpret_cast<LIST*>(n->val);
    else return NULL;
  }

  return n;
}


/*
 * Recursive helper function for each_with_indices, based on nm_list_storage_count_elements_r.
 * Handles empty/non-existent sublists.
 */
static void each_empty_with_indices_r(nm::list_storage::RecurseData& s, size_t rec, VALUE& stack) {
  VALUE empty  = s.dtype() == nm::RUBYOBJ ? *reinterpret_cast<VALUE*>(s.init()) : s.init_obj();
  NM_CONSERVATIVE(nm_register_value(&stack));

  if (rec) {
    for (unsigned long index = 0; index < s.ref_shape(rec); ++index) {
      // Don't do an unshift/shift here -- we'll let that be handled in the lowest-level iteration (recursions == 0)
      rb_ary_push(stack, LONG2NUM(index));
      each_empty_with_indices_r(s, rec-1, stack);
      rb_ary_pop(stack);
    }
  } else {
    rb_ary_unshift(stack, empty);
    for (unsigned long index = 0; index < s.ref_shape(rec); ++index) {
      rb_ary_push(stack, LONG2NUM(index));
      rb_yield_splat(stack);
      rb_ary_pop(stack);
    }
    rb_ary_shift(stack);
  }
  NM_CONSERVATIVE(nm_unregister_value(&stack));
}

/*
 * Recursive helper function for each_with_indices, based on nm_list_storage_count_elements_r.
 */
static void each_with_indices_r(nm::list_storage::RecurseData& s, const LIST* l, size_t rec, VALUE& stack) {
  if (s.dtype() == nm::RUBYOBJ)
    nm_list_storage_register_list(l, rec);
  NM_CONSERVATIVE(nm_register_value(&stack));
  NODE*  curr  = l->first;

  size_t offset = s.offset(rec);
  size_t shape  = s.ref_shape(rec);

  while (curr && curr->key < offset) curr = curr->next;
  if (curr && curr->key - offset >= shape) curr = NULL;


  if (rec) {
    for (unsigned long index = 0; index < shape; ++index) { // index in reference
      rb_ary_push(stack, LONG2NUM(index));
      if (!curr || index < curr->key - offset) {
        each_empty_with_indices_r(s, rec-1, stack);
      } else { // index == curr->key - offset
        each_with_indices_r(s, reinterpret_cast<const LIST*>(curr->val), rec-1, stack);
        curr = curr->next;
      }
      rb_ary_pop(stack);
    }
  } else {
    for (unsigned long index = 0; index < shape; ++index) {

      rb_ary_push(stack, LONG2NUM(index));

      if (!curr || index < curr->key - offset) {
        rb_ary_unshift(stack, s.dtype() == nm::RUBYOBJ ? *reinterpret_cast<VALUE*>(s.init()) : s.init_obj());

      } else { // index == curr->key - offset
        rb_ary_unshift(stack, s.dtype() == nm::RUBYOBJ ? *reinterpret_cast<VALUE*>(curr->val) : nm::rubyobj_from_cval(curr->val, s.dtype()).rval);

        curr = curr->next;
      }
      rb_yield_splat(stack);

      rb_ary_shift(stack);
      rb_ary_pop(stack);
    }
  }
  NM_CONSERVATIVE(nm_unregister_value(&stack));
  if (s.dtype() == nm::RUBYOBJ)
    nm_list_storage_unregister_list(l, rec);
}


/*
 * Recursive helper function for each_stored_with_indices, based on nm_list_storage_count_elements_r.
 */
static void each_stored_with_indices_r(nm::list_storage::RecurseData& s, const LIST* l, size_t rec, VALUE& stack) {
  if (s.dtype() == nm::RUBYOBJ)
    nm_list_storage_register_list(l, rec);
  NM_CONSERVATIVE(nm_register_value(&stack));

  NODE* curr = l->first;

  size_t offset = s.offset(rec);
  size_t shape  = s.ref_shape(rec);

  while (curr && curr->key < offset) { curr = curr->next; }
  if (curr && curr->key - offset >= shape) curr = NULL;

  if (rec) {
    while (curr) {

      rb_ary_push(stack, LONG2NUM(static_cast<long>(curr->key - offset)));
      each_stored_with_indices_r(s, reinterpret_cast<const LIST*>(curr->val), rec-1, stack);
      rb_ary_pop(stack);

      curr = curr->next;
      if (curr && curr->key - offset >= shape) curr = NULL;
    }
  } else {
    while (curr) {
      rb_ary_push(stack, LONG2NUM(static_cast<long>(curr->key - offset))); // add index to end

      // add value to beginning
      rb_ary_unshift(stack, s.dtype() == nm::RUBYOBJ ? *reinterpret_cast<VALUE*>(curr->val) : nm::rubyobj_from_cval(curr->val, s.dtype()).rval);
      // yield to the whole stack (value, i, j, k, ...)
      rb_yield_splat(stack);

      // remove the value
      rb_ary_shift(stack);

      // remove the index from the end
      rb_ary_pop(stack);

      curr = curr->next;
      if (curr && curr->key - offset >= shape) curr = NULL;
    }
  }
  NM_CONSERVATIVE(nm_unregister_value(&stack));
  if (s.dtype() == nm::RUBYOBJ)
    nm_list_storage_unregister_list(l, rec);
}


/*
 * Each/each-stored iterator, brings along the indices.
 */
VALUE nm_list_each_with_indices(VALUE nmatrix, bool stored) {

  NM_CONSERVATIVE(nm_register_value(&nmatrix));

  // If we don't have a block, return an enumerator.
  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, 0);

  nm::list_storage::RecurseData sdata(NM_STORAGE_LIST(nmatrix));

  VALUE stack = rb_ary_new();

  if (stored) each_stored_with_indices_r(sdata, sdata.top_level_list(), sdata.dim() - 1, stack);
  else        each_with_indices_r(sdata, sdata.top_level_list(), sdata.dim() - 1, stack);

  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  return nmatrix;
}


/*
 * map merged stored iterator. Always returns a matrix containing RubyObjects
 * which probably needs to be casted.
 */
VALUE nm_list_map_stored(VALUE left, VALUE init) {
  NM_CONSERVATIVE(nm_register_value(&left));
  NM_CONSERVATIVE(nm_register_value(&init));

  LIST_STORAGE *s = NM_STORAGE_LIST(left);

  // For each matrix, if it's a reference, we want to deal directly with the
  // original (with appropriate offsetting)
  nm::list_storage::RecurseData sdata(s);

  //if (!rb_block_given_p()) {
  //  rb_raise(rb_eNotImpError, "RETURN_SIZED_ENUMERATOR probably won't work for a map_merged since no merged object is created");
  //}
  // If we don't have a block, return an enumerator.
  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&left));
  NM_CONSERVATIVE(nm_unregister_value(&init));
  RETURN_SIZED_ENUMERATOR(left, 0, 0, 0); // FIXME: Test this. Probably won't work. Enable above code instead.

  // Figure out default value if none provided by the user
  if (init == Qnil) {
    nm_unregister_value(&init);
    init = rb_yield_values(1, sdata.init_obj());
    nm_register_value(&init);
  }
  // Allocate a new shape array for the resulting matrix.
  void* init_val = NM_ALLOC(VALUE);
  memcpy(init_val, &init, sizeof(VALUE));
  nm_register_value(&*reinterpret_cast<VALUE*>(init_val));

  NMATRIX* result = nm_create(nm::LIST_STORE, nm_list_storage_create(nm::RUBYOBJ, sdata.copy_alloc_shape(), s->dim, init_val));
  LIST_STORAGE* r = reinterpret_cast<LIST_STORAGE*>(result->storage);
  nm::list_storage::RecurseData rdata(r, init);
  nm_register_nmatrix(result);
  map_stored_r(rdata, sdata, rdata.top_level_list(), sdata.top_level_list(), sdata.dim() - 1);

  VALUE to_return = Data_Wrap_Struct(CLASS_OF(left), nm_mark, nm_delete, result);

  nm_unregister_nmatrix(result);
  nm_unregister_value(&*reinterpret_cast<VALUE*>(init_val));
  NM_CONSERVATIVE(nm_unregister_value(&init));
  NM_CONSERVATIVE(nm_unregister_value(&left));

  return to_return;
}


/*
 * map merged stored iterator. Always returns a matrix containing RubyObjects which probably needs to be casted.
 */
VALUE nm_list_map_merged_stored(VALUE left, VALUE right, VALUE init) {
  NM_CONSERVATIVE(nm_register_value(&left));
  NM_CONSERVATIVE(nm_register_value(&right));
  NM_CONSERVATIVE(nm_register_value(&init));

  bool scalar = false;

  LIST_STORAGE *s   = NM_STORAGE_LIST(left),
               *t;

  // For each matrix, if it's a reference, we want to deal directly with the original (with appropriate offsetting)
  nm::list_storage::RecurseData sdata(s);

  void* scalar_init = NULL;

  // right might be a scalar, in which case this is a scalar operation.
  if (!IsNMatrixType(right)) {
    nm::dtype_t r_dtype = Upcast[NM_DTYPE(left)][nm_dtype_min(right)];
    scalar_init         = rubyobj_to_cval(right, r_dtype); // make a copy of right

    t                   = reinterpret_cast<LIST_STORAGE*>(nm_list_storage_create(r_dtype, sdata.copy_alloc_shape(), s->dim, scalar_init));
    scalar              = true;
  } else {
    t                   = NM_STORAGE_LIST(right); // element-wise, not scalar.
  }

  //if (!rb_block_given_p()) {
  //  rb_raise(rb_eNotImpError, "RETURN_SIZED_ENUMERATOR probably won't work for a map_merged since no merged object is created");
  //}
  // If we don't have a block, return an enumerator.
  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&left));
  NM_CONSERVATIVE(nm_unregister_value(&right));
  NM_CONSERVATIVE(nm_unregister_value(&init));
  RETURN_SIZED_ENUMERATOR(left, 0, 0, 0); // FIXME: Test this. Probably won't work. Enable above code instead.

  // Figure out default value if none provided by the user
  nm::list_storage::RecurseData& tdata = *(new nm::list_storage::RecurseData(t)); //FIXME: this is a hack to make sure that we can run the destructor before nm_list_storage_delete(t) below.
  if (init == Qnil) {
    nm_unregister_value(&init);
    init = rb_yield_values(2, sdata.init_obj(), tdata.init_obj());
    nm_register_value(&init);
  }

  // Allocate a new shape array for the resulting matrix.
  void* init_val = NM_ALLOC(VALUE);
  memcpy(init_val, &init, sizeof(VALUE));
  nm_register_value(&*reinterpret_cast<VALUE*>(init_val));

  NMATRIX* result = nm_create(nm::LIST_STORE, nm_list_storage_create(nm::RUBYOBJ, sdata.copy_alloc_shape(), s->dim, init_val));
  LIST_STORAGE* r = reinterpret_cast<LIST_STORAGE*>(result->storage);
  nm::list_storage::RecurseData rdata(r, init);
  map_merged_stored_r(rdata, sdata, tdata, rdata.top_level_list(), sdata.top_level_list(), tdata.top_level_list(), sdata.dim() - 1);

  delete &tdata;
  // If we are working with a scalar operation
  if (scalar) nm_list_storage_delete(t);

  VALUE to_return = Data_Wrap_Struct(CLASS_OF(left), nm_mark, nm_delete, result);

  nm_unregister_value(&*reinterpret_cast<VALUE*>(init_val));

  NM_CONSERVATIVE(nm_unregister_value(&init));
  NM_CONSERVATIVE(nm_unregister_value(&right));
  NM_CONSERVATIVE(nm_unregister_value(&left));

  return to_return;
}


/*
 * Copy a slice of a list matrix into a regular list matrix.
 */
static LIST* slice_copy(const LIST_STORAGE* src, LIST* src_rows, size_t* coords, size_t* lengths, size_t n) {
  nm_list_storage_register(src);
  void *val = NULL;
  int key;

  LIST* dst_rows = nm::list::create();
  NODE* src_node = src_rows->first;
  std::list<VALUE*> temp_vals;
  std::list<LIST*> temp_lists;
  while (src_node) {
    key = src_node->key - (src->offset[n] + coords[n]);

    if (key >= 0 && (size_t)key < lengths[n]) {
      if (src->dim - n > 1) {
        val = slice_copy( src,
                          reinterpret_cast<LIST*>(src_node->val),
                          coords,
                          lengths,
                          n + 1    );
        if (val) {
          if (src->dtype == nm::RUBYOBJ) {
            nm_list_storage_register_list(reinterpret_cast<LIST*>(val), src->dim - n - 2);
            temp_lists.push_front(reinterpret_cast<LIST*>(val));
          }
          nm::list::insert_copy(dst_rows, false, key, val, sizeof(LIST));
        }
      } else { // matches src->dim - n > 1
        if (src->dtype == nm::RUBYOBJ) {
          nm_register_value(&*reinterpret_cast<VALUE*>(src_node->val));
          temp_vals.push_front(reinterpret_cast<VALUE*>(src_node->val));
        }
        nm::list::insert_copy(dst_rows, false, key, src_node->val, DTYPE_SIZES[src->dtype]);
      }
    }
    src_node = src_node->next;
 }
  if (src->dtype == nm::RUBYOBJ) {
    __nm_list_storage_unregister_temp_list_list(temp_lists, src->dim - n - 2);
    __nm_list_storage_unregister_temp_value_list(temp_vals);
  }
  nm_list_storage_unregister(src);
  return dst_rows;
}

/*
 * Documentation goes here.
 */
void* nm_list_storage_get(const STORAGE* storage, SLICE* slice) {
  LIST_STORAGE* s = (LIST_STORAGE*)storage;
  LIST_STORAGE* ns = NULL;

  nm_list_storage_register(s);

  if (slice->single) {
    NODE* n = list_storage_get_single_node(s, slice);
    nm_list_storage_unregister(s);
    return (n ? n->val : s->default_val);
  } else {
    void *init_val = NM_ALLOC_N(char, DTYPE_SIZES[s->dtype]);
    memcpy(init_val, s->default_val, DTYPE_SIZES[s->dtype]);
    if (s->dtype == nm::RUBYOBJ)
      nm_register_value(&*reinterpret_cast<VALUE*>(init_val));

    size_t *shape = NM_ALLOC_N(size_t, s->dim);
    memcpy(shape, slice->lengths, sizeof(size_t) * s->dim);

    ns = nm_list_storage_create(s->dtype, shape, s->dim, init_val);

    ns->rows = slice_copy(s, s->rows, slice->coords, slice->lengths, 0);

    if (s->dtype == nm::RUBYOBJ) {
      nm_unregister_value(&*reinterpret_cast<VALUE*>(init_val));
    }

    nm_list_storage_unregister(s);

    return ns;
  }
}

/*
 * Get the contents of some set of coordinates. Note: Does not make a copy!
 * Don't free!
 */
void* nm_list_storage_ref(const STORAGE* storage, SLICE* slice) {
  LIST_STORAGE* s = (LIST_STORAGE*)storage;
  LIST_STORAGE* ns = NULL;
  nm_list_storage_register(s);

  //TODO: It needs a refactoring.
  if (slice->single) {
    NODE* n = list_storage_get_single_node(s, slice);
    nm_list_storage_unregister(s);
    return (n ? n->val : s->default_val);
  } else {
    ns = NM_ALLOC( LIST_STORAGE );

    ns->dim = s->dim;
    ns->dtype = s->dtype;
    ns->offset = NM_ALLOC_N(size_t, ns->dim);
    ns->shape = NM_ALLOC_N(size_t, ns->dim);

    for (size_t i = 0; i < ns->dim; ++i) {
      ns->offset[i] = slice->coords[i] + s->offset[i];
      ns->shape[i]  = slice->lengths[i];
    }

    ns->rows        = s->rows;
    ns->default_val = s->default_val;

    s->src->count++;
    ns->src         = s->src;
    nm_list_storage_unregister(s);
    return ns;
  }
}


/*
 * Recursive function, sets multiple values in a matrix from a single source value.
 */
static void slice_set_single(LIST_STORAGE* dest, LIST* l, void* val, size_t* coords, size_t* lengths, size_t n) {
  nm_list_storage_register(dest);
  if (dest->dtype == nm::RUBYOBJ) {
    nm_register_value(&*reinterpret_cast<VALUE*>(val));
    nm_list_storage_register_list(l, dest->dim - n - 1);
  }

  // drill down into the structure
  NODE* node = NULL;
  if (dest->dim - n > 1) {
    std::list<LIST*> temp_nodes;
    for (size_t i = 0; i < lengths[n]; ++i) {

      size_t key = i + dest->offset[n] + coords[n];

      if (!node) {
        // try to insert list
        node = nm::list::insert(l, false, key, nm::list::create());
      } else if (!node->next || (node->next && node->next->key > key)) {
        node = nm::list::insert_after(node, key, nm::list::create());
      } else {
        node = node->next; // correct rank already exists.
      }

      if (dest->dtype == nm::RUBYOBJ) {
        temp_nodes.push_front(reinterpret_cast<LIST*>(node->val));
        nm_list_storage_register_list(reinterpret_cast<LIST*>(node->val), dest->dim - n - 2);
      }

      // cast it to a list and recurse
      slice_set_single(dest, reinterpret_cast<LIST*>(node->val), val, coords, lengths, n + 1);
    }
    __nm_list_storage_unregister_temp_list_list(temp_nodes, dest->dim - n - 2);
  } else {
    std::list<VALUE*> temp_vals;
    for (size_t i = 0; i < lengths[n]; ++i) {

      size_t key = i + dest->offset[n] + coords[n];

      if (!node)  {
        node = nm::list::insert_copy(l, true, key, val, DTYPE_SIZES[dest->dtype]);
      } else {
        node = nm::list::replace_insert_after(node, key, val, true, DTYPE_SIZES[dest->dtype]);
      }
      if (dest->dtype == nm::RUBYOBJ) {
        temp_vals.push_front(reinterpret_cast<VALUE*>(node->val));
        nm_register_value(&*reinterpret_cast<VALUE*>(node->val));
      }
    }
    __nm_list_storage_unregister_temp_value_list(temp_vals);
  }

  nm_list_storage_unregister(dest);
  if (dest->dtype == nm::RUBYOBJ) {
    nm_unregister_value(&*reinterpret_cast<VALUE*>(val));
    nm_list_storage_unregister_list(l, dest->dim - n - 1);
  }
}


/*
 * Set a value or values in a list matrix.
 */
void nm_list_storage_set(VALUE left, SLICE* slice, VALUE right) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::set, void, VALUE, SLICE*, VALUE)
  ttable[NM_DTYPE(left)](left, slice, right);
}


/*
 * Insert an entry directly in a row (not using copy! don't free after).
 *
 * Returns a pointer to the insertion location.
 *
 * TODO: Allow this function to accept an entire row and not just one value -- for slicing
 */
NODE* nm_list_storage_insert(STORAGE* storage, SLICE* slice, void* val) {
  LIST_STORAGE* s = (LIST_STORAGE*)storage;
  nm_list_storage_register(s);
  if (s->dtype == nm::RUBYOBJ)
    nm_register_value(&*reinterpret_cast<VALUE*>(val));
  // Pretend dims = 2
  // Then coords is going to be size 2
  // So we need to find out if some key already exists
  size_t r;
  NODE*  n;
  LIST*  l = s->rows;

  // drill down into the structure
  for (r = 0; r < s->dim -1; ++r) {
    n = nm::list::insert(l, false, s->offset[r] + slice->coords[s->dim - r], nm::list::create());
    l = reinterpret_cast<LIST*>(n->val);
  }

  nm_list_storage_unregister(s);
  if (s->dtype == nm::RUBYOBJ)
    nm_unregister_value(&*reinterpret_cast<VALUE*>(val));

  return nm::list::insert(l, true, s->offset[r] + slice->coords[r], val);
}

/*
 * Remove an item or slice from list storage.
 */
void nm_list_storage_remove(STORAGE* storage, SLICE* slice) {
  LIST_STORAGE* s = (LIST_STORAGE*)storage;

  // This returns a boolean, which will indicate whether s->rows is empty.
  // We can safely ignore it, since we never want to delete s->rows until
  // it's time to destroy the LIST_STORAGE object.
  nm::list::remove_recursive(s->rows, slice->coords, s->offset, slice->lengths, 0, s->dim);
}

///////////
// Tests //
///////////

/*
 * Comparison of contents for list storage.
 */
bool nm_list_storage_eqeq(const STORAGE* left, const STORAGE* right) {
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::eqeq_r, bool, nm::list_storage::RecurseData& left, nm::list_storage::RecurseData& right, const LIST* l, const LIST* r, size_t rec)

  nm::list_storage::RecurseData ldata(reinterpret_cast<const LIST_STORAGE*>(left)),
                                rdata(reinterpret_cast<const LIST_STORAGE*>(right));

  return ttable[left->dtype][right->dtype](ldata, rdata, ldata.top_level_list(), rdata.top_level_list(), ldata.dim()-1);
}

//////////
// Math //
//////////


/*
 * List storage matrix multiplication.
 */
STORAGE* nm_list_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
  free(resulting_shape);
  rb_raise(rb_eNotImpError, "multiplication not implemented for list-of-list matrices");
  return NULL;
  //DTYPE_TEMPLATE_TABLE(dense_storage::matrix_multiply, NMATRIX*, STORAGE_PAIR, size_t*, bool);

  //return ttable[reinterpret_cast<DENSE_STORAGE*>(casted_storage.left)->dtype](casted_storage, resulting_shape, vector);
}


/*
 * List storage to Hash conversion. Uses Hashes with default values, so you can continue to pretend
 * it's a sparse matrix.
 */
VALUE nm_list_storage_to_hash(const LIST_STORAGE* s, const nm::dtype_t dtype) {
  nm_list_storage_register(s);
  // Get the default value for the list storage.
  VALUE default_value = nm::rubyobj_from_cval(s->default_val, dtype).rval;
  nm_list_storage_unregister(s);
  // Recursively copy each dimension of the matrix into a nested hash.
  return nm_list_copy_to_hash(s->rows, dtype, s->dim - 1, default_value);
}

/////////////
// Utility //
/////////////

/*
 * Recursively count the non-zero elements in a list storage object.
 */
size_t nm_list_storage_count_elements_r(const LIST* l, size_t recursions) {
  size_t count = 0;
  NODE* curr = l->first;

  if (recursions) {
    while (curr) {
      count += nm_list_storage_count_elements_r(reinterpret_cast<const LIST*>(curr->val), recursions - 1);
      curr   = curr->next;
    }

  } else {
    while (curr) {
      ++count;
      curr = curr->next;
    }
  }

  return count;
}

/*
 * Count non-diagonal non-zero elements.
 */
size_t nm_list_storage_count_nd_elements(const LIST_STORAGE* s) {
  NODE *i_curr, *j_curr;
  size_t count = 0;

  if (s->dim != 2) {
    rb_raise(rb_eNotImpError, "non-diagonal element counting only defined for dim = 2");
  }

  for (i_curr = s->rows->first; i_curr; i_curr = i_curr->next) {
    int i = i_curr->key - s->offset[0];
    if (i < 0 || i >= (int)s->shape[0]) continue;

    for (j_curr = ((LIST*)(i_curr->val))->first; j_curr; j_curr = j_curr->next) {
      int j = j_curr->key - s->offset[1];
      if (j < 0 || j >= (int)s->shape[1]) continue;

      if (i != j)    ++count;
    }
  }

  return count;
}

/////////////////////////
// Copying and Casting //
/////////////////////////
//
/*
 * List storage copy constructor C access.
 */

LIST_STORAGE* nm_list_storage_copy(const LIST_STORAGE* rhs) {
  nm_list_storage_register(rhs);
  size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
  memcpy(shape, rhs->shape, sizeof(size_t) * rhs->dim);

  void *init_val = NM_ALLOC_N(char, DTYPE_SIZES[rhs->dtype]);
  memcpy(init_val, rhs->default_val, DTYPE_SIZES[rhs->dtype]);

  LIST_STORAGE* lhs = nm_list_storage_create(rhs->dtype, shape, rhs->dim, init_val);
  nm_list_storage_register(lhs);

  lhs->rows = slice_copy(rhs, rhs->rows, lhs->offset, lhs->shape, 0);

  nm_list_storage_unregister(rhs);
  nm_list_storage_unregister(lhs);
  return lhs;
}

/*
 * List storage copy constructor C access with casting.
 */
STORAGE* nm_list_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::cast_copy, LIST_STORAGE*, const LIST_STORAGE* rhs, nm::dtype_t new_dtype);

  return (STORAGE*)ttable[new_dtype][rhs->dtype]((LIST_STORAGE*)rhs, new_dtype);
}


/*
 * List storage copy constructor for transposing.
 */
STORAGE* nm_list_storage_copy_transposed(const STORAGE* rhs_base) {
  rb_raise(rb_eNotImpError, "list storage transpose not yet implemented");
  return NULL;
}


} // end of extern "C" block


/////////////////////////
// Templated Functions //
/////////////////////////


namespace nm {
namespace list_storage {


/*
 * List storage copy constructor for changing dtypes.
 */
template <typename LDType, typename RDType>
static LIST_STORAGE* cast_copy(const LIST_STORAGE* rhs, dtype_t new_dtype) {
  nm_list_storage_register(rhs);
  // allocate and copy shape
  size_t* shape = NM_ALLOC_N(size_t, rhs->dim);
  memcpy(shape, rhs->shape, rhs->dim * sizeof(size_t));

  // copy default value
  LDType* default_val = NM_ALLOC_N(LDType, 1);
  *default_val = *reinterpret_cast<RDType*>(rhs->default_val);

  LIST_STORAGE* lhs = nm_list_storage_create(new_dtype, shape, rhs->dim, default_val);
  //lhs->rows         = nm::list::create();

  nm_list_storage_register(lhs);
  // TODO: Needs optimization. When matrix is reference it is copped twice.
  if (rhs->src == rhs)
    nm::list::cast_copy_contents<LDType, RDType>(lhs->rows, rhs->rows, rhs->dim - 1);
  else {
    LIST_STORAGE *tmp = nm_list_storage_copy(rhs);
    nm_list_storage_register(tmp);
    nm::list::cast_copy_contents<LDType, RDType>(lhs->rows, tmp->rows, rhs->dim - 1);
    nm_list_storage_unregister(tmp);
    nm_list_storage_delete(tmp);
  }
  nm_list_storage_unregister(lhs);
  nm_list_storage_unregister(rhs);
  return lhs;
}


/*
 * Recursive helper function for eqeq. Note that we use SDType and TDType instead of L and R because this function
 * is a re-labeling. That is, it can be called in order L,R or order R,L; and we don't want to get confused. So we
 * use S and T to denote first and second passed in.
 */
template <typename SDType, typename TDType>
static bool eqeq_empty_r(RecurseData& s, const LIST* l, size_t rec, const TDType* t_init) {
  NODE* curr  = l->first;

  // For reference matrices, make sure we start in the correct place.
  while (curr && curr->key < s.offset(rec)) {  curr = curr->next;  }
  if (curr && curr->key - s.offset(rec) >= s.ref_shape(rec)) curr = NULL;

  if (rec) {
    while (curr) {
      if (!eqeq_empty_r<SDType,TDType>(s, reinterpret_cast<const LIST*>(curr->val), rec-1, t_init)) return false;
      curr = curr->next;

      if (curr && curr->key - s.offset(rec) >= s.ref_shape(rec)) curr = NULL;
    }
  } else {
    while (curr) {
      if (*reinterpret_cast<SDType*>(curr->val) != *t_init) return false;
      curr = curr->next;

      if (curr && curr->key - s.offset(rec) >= s.ref_shape(rec)) curr = NULL;
    }
  }
  return true;
}


/*
 * Do these two list matrices of the same dtype have exactly the same contents (accounting for default_vals)?
 *
 * This function is recursive.
 */
template <typename LDType, typename RDType>
static bool eqeq_r(RecurseData& left, RecurseData& right, const LIST* l, const LIST* r, size_t rec) {
  NODE *lcurr = l->first,
       *rcurr = r->first;

  // For reference matrices, make sure we start in the correct place.
  while (lcurr && lcurr->key < left.offset(rec)) {  lcurr = lcurr->next;  }
  while (rcurr && rcurr->key < right.offset(rec)) {  rcurr = rcurr->next;  }
  if (rcurr && rcurr->key - right.offset(rec) >= left.ref_shape(rec)) rcurr = NULL;
  if (lcurr && lcurr->key - left.offset(rec) >= left.ref_shape(rec)) lcurr = NULL;

  bool compared = false;

  if (rec) {

    while (lcurr || rcurr) {

      if (!rcurr || (lcurr && (lcurr->key - left.offset(rec) < rcurr->key - right.offset(rec)))) {
        if (!eqeq_empty_r<LDType,RDType>(left, reinterpret_cast<const LIST*>(lcurr->val), rec-1, reinterpret_cast<const RDType*>(right.init()))) return false;
        lcurr   = lcurr->next;
      } else if (!lcurr || (rcurr && (rcurr->key - right.offset(rec) < lcurr->key - left.offset(rec)))) {
        if (!eqeq_empty_r<RDType,LDType>(right, reinterpret_cast<const LIST*>(rcurr->val), rec-1, reinterpret_cast<const LDType*>(left.init()))) return false;
        rcurr   = rcurr->next;
      } else { // keys are == and both present
        if (!eqeq_r<LDType,RDType>(left, right, reinterpret_cast<const LIST*>(lcurr->val), reinterpret_cast<const LIST*>(rcurr->val), rec-1)) return false;
        lcurr   = lcurr->next;
        rcurr   = rcurr->next;
      }
      if (rcurr && rcurr->key - right.offset(rec) >= right.ref_shape(rec)) rcurr = NULL;
      if (lcurr && lcurr->key - left.offset(rec)  >= left.ref_shape(rec)) lcurr = NULL;
      compared = true;
    }
  } else {
    while (lcurr || rcurr) {

      if (rcurr && rcurr->key - right.offset(rec) >= left.ref_shape(rec)) rcurr = NULL;
      if (lcurr && lcurr->key - left.offset(rec) >= left.ref_shape(rec)) lcurr = NULL;

      if (!rcurr || (lcurr && (lcurr->key - left.offset(rec) < rcurr->key - right.offset(rec)))) {
        if (*reinterpret_cast<LDType*>(lcurr->val) != *reinterpret_cast<const RDType*>(right.init())) return false;
        lcurr         = lcurr->next;
      } else if (!lcurr || (rcurr && (rcurr->key - right.offset(rec) < lcurr->key - left.offset(rec)))) {
        if (*reinterpret_cast<RDType*>(rcurr->val) != *reinterpret_cast<const LDType*>(left.init())) return false;
        rcurr         = rcurr->next;
      } else { // keys == and both left and right nodes present
        if (*reinterpret_cast<LDType*>(lcurr->val) != *reinterpret_cast<RDType*>(rcurr->val)) return false;
        lcurr         = lcurr->next;
        rcurr         = rcurr->next;
      }
      if (rcurr && rcurr->key - right.offset(rec) >= right.ref_shape(rec)) rcurr = NULL;
      if (lcurr && lcurr->key - left.offset(rec)  >= left.ref_shape(rec)) lcurr = NULL;
      compared = true;
    }
  }

  // Final condition: both containers are empty, and have different default values.
  if (!compared && !lcurr && !rcurr) return *reinterpret_cast<const LDType*>(left.init()) == *reinterpret_cast<const RDType*>(right.init());
  return true;
}


}} // end of namespace nm::list_storage

extern "C" {
  /*
   * call-seq:
   *     __list_to_hash__ -> Hash
   *
   * Create a Ruby Hash from a list NMatrix.
   *
   * This is an internal C function which handles list stype only.
   */
  VALUE nm_to_hash(VALUE self) {
    return nm_list_storage_to_hash(NM_STORAGE_LIST(self), NM_DTYPE(self));
  }

  /*
   * call-seq:
   *     __list_default_value__ -> ...
   *
   * Get the default_value property from a list matrix.
   */
  VALUE nm_list_default_value(VALUE self) {
    NM_CONSERVATIVE(nm_register_value(&self));
    VALUE to_return = (NM_DTYPE(self) == nm::RUBYOBJ) ? *reinterpret_cast<VALUE*>(NM_DEFAULT_VAL(self)) : nm::rubyobj_from_cval(NM_DEFAULT_VAL(self), NM_DTYPE(self)).rval;
    NM_CONSERVATIVE(nm_unregister_value(&self));
    return to_return;
  }
} // end of extern "C" block


================================================
FILE: ext/nmatrix/storage/list/list.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == list.h
//
// List-of-lists n-dimensional matrix storage. Uses singly-linked
// lists.

#ifndef LIST_H
#define LIST_H

/*
 * Standard Includes
 */

#include <ruby.h>
#include <cstdlib>
#include <list>
/*
 * Project Includes
 */

#include "types.h"
#include "data/data.h"
#include "../common.h"
#include "util/sl_list.h"
#include "nmatrix.h"

/*
 * Macros
 */

/*
 * Types
 */


/*
 * Data
 */
 
extern "C" {

  /*
   * Functions
   */

  ////////////////
  // Lifecycle //
  ///////////////

  LIST_STORAGE*  nm_list_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* init_val);
  void          nm_list_storage_delete(STORAGE* s);
  void          nm_list_storage_delete_ref(STORAGE* s);
  void          nm_list_storage_mark(STORAGE*);
  void          nm_list_storage_register(const STORAGE* s);
  void          nm_list_storage_unregister(const STORAGE* s);
  void          nm_list_storage_register_list(const LIST* l, size_t recursions);
  void          nm_list_storage_unregister_list(const LIST* l, size_t recursions);
  void          nm_list_storage_register_node(const NODE* n);
  void          nm_list_storage_unregister_node(const NODE* n);
  void          nm_list_storage_completely_unregister_node(const NODE* curr);
  ///////////////
  // Accessors //
  ///////////////

  VALUE nm_list_each_with_indices(VALUE nmatrix, bool stored);
  void* nm_list_storage_ref(const STORAGE* s, SLICE* slice);
  void* nm_list_storage_get(const STORAGE* s, SLICE* slice);
  NODE* nm_list_storage_insert(STORAGE* s, SLICE* slice, void* val);
  void  nm_list_storage_set(VALUE left, SLICE* slice, VALUE right);
  void  nm_list_storage_remove(STORAGE* s, SLICE* slice);

  ///////////
  // Tests //
  ///////////

  bool nm_list_storage_eqeq(const STORAGE* left, const STORAGE* right);

  //////////
  // Math //
  //////////

  STORAGE* nm_list_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);


  /////////////
  // Utility //
  /////////////

  size_t nm_list_storage_count_elements_r(const LIST* l, size_t recursions);
  size_t nm_list_storage_count_nd_elements(const LIST_STORAGE* s);

  /*
   * Count non-zero elements. See also count_list_storage_nd_elements.
   */
  inline size_t nm_list_storage_count_elements(const LIST_STORAGE* s) {
    return nm_list_storage_count_elements_r(s->rows, s->dim - 1);
  }

  /////////////////////////
  // Copying and Casting //
  /////////////////////////

  LIST_STORAGE* nm_list_storage_copy(const LIST_STORAGE* rhs);
  STORAGE*      nm_list_storage_copy_transposed(const STORAGE* rhs_base);
  STORAGE*      nm_list_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void*);
  VALUE         nm_list_storage_to_hash(const LIST_STORAGE* s, const nm::dtype_t dtype);

  // Exposed functions
  VALUE nm_to_hash(VALUE self);
  VALUE nm_list_map_merged_stored(VALUE left, VALUE right, VALUE init);
  VALUE nm_list_map_stored(VALUE left, VALUE init);
  VALUE nm_list_default_value(VALUE self);
} // end of extern "C" block

#endif // LIST_H


================================================
FILE: ext/nmatrix/storage/storage.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == storage.cpp
//
// Code that is used by or involves more then one storage type.

/*
 * Standard Includes
 */

/*
 * Project Includes
 */

#include "data/data.h"

#include "storage.h"

#include "common.h"

/*
 * Macros
 */

/*
 * Global Variables
 */

extern "C" {

const char* const STYPE_NAMES[nm::NUM_STYPES] = {
  "dense",
  "list",
  "yale"
};

} // end extern "C" block

/*
 * Forward Declarations
 */

namespace nm {


/*
 * Functions
 */

/////////////////////////
// Templated Functions //
/////////////////////////

namespace dense_storage {

template <typename LDType, typename RDType>
static void cast_copy_list_contents(LDType* lhs, const LIST* rhs, RDType* default_val,
  size_t& pos, const size_t* shape, size_t dim, size_t max_elements, size_t recursions);

template <typename LDType, typename RDType>
static void cast_copy_list_default(LDType* lhs, RDType* default_val, size_t& pos,
  const size_t* shape, size_t dim, size_t max_elements, size_t recursions);

/*
 * Convert (by creating a copy) from list storage to dense storage.
 */
template <typename LDType, typename RDType>
DENSE_STORAGE* create_from_list_storage(const LIST_STORAGE* rhs, dtype_t l_dtype) {
  nm_list_storage_register(rhs);
  // allocate and copy shape
  size_t* shape = NM_ALLOC_N(size_t, rhs->dim);
  memcpy(shape, rhs->shape, rhs->dim * sizeof(size_t));

  DENSE_STORAGE* lhs = nm_dense_storage_create(l_dtype, shape, rhs->dim, NULL, 0);

  // Position in lhs->elements.
  size_t pos = 0;
  size_t max_elements = nm_storage_count_max_elements(rhs);

//static void dense_storage_cast_copy_list_contents_template(LDType* lhs, const LIST* rhs, RDType* default_val, size_t& pos, const size_t* shape, size_t dim, size_t max_elements, size_t recursions)
  // recursively copy the contents
  if (rhs->src == rhs)
    cast_copy_list_contents<LDType,RDType>(reinterpret_cast<LDType*>(lhs->elements),
                                         rhs->rows,
                                         reinterpret_cast<RDType*>(rhs->default_val),
                                         pos, shape, lhs->dim, max_elements, rhs->dim-1);
  else {
    LIST_STORAGE *tmp = nm_list_storage_copy(rhs);
    cast_copy_list_contents<LDType,RDType>(reinterpret_cast<LDType*>(lhs->elements),
                                         tmp->rows,
                                         reinterpret_cast<RDType*>(tmp->default_val),
                                         pos, shape, lhs->dim, max_elements, tmp->dim-1);
    nm_list_storage_delete(tmp);

  }
  nm_list_storage_unregister(rhs);

  return lhs;
}


/*
 * Create/allocate dense storage, copying into it the contents of a Yale matrix.
 */
template <typename LDType, typename RDType>
DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype) {

  nm_yale_storage_register(rhs);
  // Position in rhs->elements.
  IType*  rhs_ija = reinterpret_cast<YALE_STORAGE*>(rhs->src)->ija;
  RDType* rhs_a   = reinterpret_cast<RDType*>(reinterpret_cast<YALE_STORAGE*>(rhs->src)->a);

  // Allocate and set shape.
  size_t* shape = NM_ALLOC_N(size_t, rhs->dim);
  shape[0] = rhs->shape[0];
  shape[1] = rhs->shape[1];

  DENSE_STORAGE* lhs = nm_dense_storage_create(l_dtype, shape, rhs->dim, NULL, 0);
  LDType* lhs_elements = reinterpret_cast<LDType*>(lhs->elements);

  // Position in dense to write to.
  size_t pos = 0;

  LDType LCAST_ZERO = rhs_a[rhs->src->shape[0]];

  // Walk through rows. For each entry we set in dense, increment pos.
  for (size_t i = 0; i < shape[0]; ++i) {
    IType ri = i + rhs->offset[0];

    if (rhs_ija[ri] == rhs_ija[ri+1]) { // Check boundaries of row: is row empty? (Yes.)

      // Write zeros in each column.
      for (size_t j = 0; j < shape[1]; ++j) { // Move to next dense position.

        // Fill in zeros and copy the diagonal entry for this empty row.
        if (ri == j + rhs->offset[1]) lhs_elements[pos] = static_cast<LDType>(rhs_a[ri]);
        else                          lhs_elements[pos] = LCAST_ZERO;

        ++pos;
      }

    } else {  // Row contains entries: write those in each column, interspersed with zeros.

      // Get the first ija position of the row (as sliced)
      IType ija = nm::yale_storage::binary_search_left_boundary(rhs, rhs_ija[ri], rhs_ija[ri+1]-1, rhs->offset[1]);

      // What column is it?
      IType next_stored_rj = rhs_ija[ija];

      for (size_t j = 0; j < shape[1]; ++j) {
        IType rj = j + rhs->offset[1];

        if (rj == ri) { // at a diagonal in RHS
          lhs_elements[pos] = static_cast<LDType>(rhs_a[ri]);

        } else if (rj == next_stored_rj) { // column ID was found in RHS
          lhs_elements[pos] = static_cast<LDType>(rhs_a[ija]); // Copy from rhs.

          // Get next.
          ++ija;

          // Increment to next column ID (or go off the end).
          if (ija < rhs_ija[ri+1]) next_stored_rj = rhs_ija[ija];
          else                      next_stored_rj = rhs->src->shape[1];

        } else { // rj < next_stored_rj

          // Insert zero.
          lhs_elements[pos] = LCAST_ZERO;
        }

        // Move to next dense position.
        ++pos;
      }
    }
  }
  nm_yale_storage_unregister(rhs);

  return lhs;
}


/*
 * Copy list contents into dense recursively.
 */
template <typename LDType, typename RDType>
static void cast_copy_list_contents(LDType* lhs, const LIST* rhs, RDType* default_val, size_t& pos, const size_t* shape, size_t dim, size_t max_elements, size_t recursions) {

  NODE *curr = rhs->first;
  int last_key = -1;

  nm_list_storage_register_list(rhs, recursions);

  for (size_t i = 0; i < shape[dim - 1 - recursions]; ++i, ++pos) {

    if (!curr || (curr->key > (size_t)(last_key+1))) {

      if (recursions == 0)  lhs[pos] = static_cast<LDType>(*default_val);
      else                   cast_copy_list_default<LDType,RDType>(lhs, default_val, pos, shape, dim, max_elements, recursions-1);

      ++last_key;

    } else {

      if (recursions == 0)  lhs[pos] = static_cast<LDType>(*reinterpret_cast<RDType*>(curr->val));
      else                  cast_copy_list_contents<LDType,RDType>(lhs, (const LIST*)(curr->val),
                                                                                         default_val, pos, shape, dim, max_elements, recursions-1);

      last_key = curr->key;
      curr     = curr->next;
    }
  }

  nm_list_storage_unregister_list(rhs, recursions);

  --pos;
}

/*
 * Copy a set of default values into dense.
 */
template <typename LDType,typename RDType>
static void cast_copy_list_default(LDType* lhs, RDType* default_val, size_t& pos, const size_t* shape, size_t dim, size_t max_elements, size_t recursions) {
  for (size_t i = 0; i < shape[dim - 1 - recursions]; ++i, ++pos) {

    if (recursions == 0)    lhs[pos] = static_cast<LDType>(*default_val);
    else                    cast_copy_list_default<LDType,RDType>(lhs, default_val, pos, shape, dim, max_elements, recursions-1);

  }

  --pos;
}


} // end of namespace dense_storage

namespace list_storage {


template <typename LDType, typename RDType>
static bool cast_copy_contents_dense(LIST* lhs, const RDType* rhs, RDType* zero, size_t& pos, size_t* coords, const size_t* shape, size_t dim, size_t recursions);

/*
 * Creation of list storage from dense storage.
 */
template <typename LDType, typename RDType>
LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype, void* init) {
  nm_dense_storage_register(rhs);

  LDType* l_default_val = NM_ALLOC_N(LDType, 1);
  RDType* r_default_val = NM_ALLOCA_N(RDType, 1); // clean up when finished with this function

  // allocate and copy shape and coords
  size_t *shape  = NM_ALLOC_N(size_t, rhs->dim),
         *coords = NM_ALLOC_N(size_t, rhs->dim);

  memcpy(shape, rhs->shape, rhs->dim * sizeof(size_t));
  memset(coords, 0, rhs->dim * sizeof(size_t));

  // set list default_val to 0
  if (init) *l_default_val = *reinterpret_cast<LDType*>(init);
  else {
    if (l_dtype == RUBYOBJ)    *l_default_val = INT2FIX(0);
    else                      *l_default_val = 0;
  }

  // need test default value for comparing to elements in dense matrix
  if (rhs->dtype == l_dtype || rhs->dtype != RUBYOBJ) *r_default_val = static_cast<RDType>(*l_default_val);
  else                                                *r_default_val = nm::rubyobj_from_cval(l_default_val, l_dtype);


  LIST_STORAGE* lhs = nm_list_storage_create(l_dtype, shape, rhs->dim, l_default_val);

  nm_list_storage_register(lhs);

  size_t pos = 0;

  if (rhs->src == rhs)
    list_storage::cast_copy_contents_dense<LDType,RDType>(lhs->rows,
                                                          reinterpret_cast<const RDType*>(rhs->elements),
                                                        r_default_val,
                                                        pos, coords, rhs->shape, rhs->dim, rhs->dim - 1);
  else {
    DENSE_STORAGE* tmp = nm_dense_storage_copy(rhs);
    list_storage::cast_copy_contents_dense<LDType,RDType>(lhs->rows,
                                                          reinterpret_cast<const RDType*>(tmp->elements),
                                                        r_default_val,
                                                        pos, coords, rhs->shape, rhs->dim, rhs->dim - 1);

    nm_dense_storage_delete(tmp);
  }

  nm_list_storage_unregister(lhs);
  nm_dense_storage_unregister(rhs);

  return lhs;
}


/*
 * Creation of list storage from yale storage.
 */
template <typename LDType, typename RDType>
LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype) {
  // allocate and copy shape
  nm_yale_storage_register(rhs);

  size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
  shape[0] = rhs->shape[0]; shape[1] = rhs->shape[1];

  RDType* rhs_a    = reinterpret_cast<RDType*>(reinterpret_cast<YALE_STORAGE*>(rhs->src)->a);
  RDType R_ZERO    = rhs_a[ rhs->src->shape[0] ];

  // copy default value from the zero location in the Yale matrix
  LDType* default_val = NM_ALLOC_N(LDType, 1);
  *default_val        = static_cast<LDType>(R_ZERO);

  LIST_STORAGE* lhs = nm_list_storage_create(l_dtype, shape, rhs->dim, default_val);

  if (rhs->dim != 2)    rb_raise(nm_eStorageTypeError, "Can only convert matrices of dim 2 from yale.");

  IType* rhs_ija  = reinterpret_cast<YALE_STORAGE*>(rhs->src)->ija;

  NODE *last_row_added = NULL;
  // Walk through rows and columns as if RHS were a dense matrix
  for (IType i = 0; i < shape[0]; ++i) {
    IType ri = i + rhs->offset[0];

    NODE *last_added = NULL;

    // Get boundaries of beginning and end of row
    IType ija      = rhs_ija[ri],
          ija_next = rhs_ija[ri+1];

    // Are we going to need to add a diagonal for this row?
    bool add_diag = false;
    if (rhs_a[ri] != R_ZERO) add_diag = true; // non-zero and located within the bounds of the slice

    if (ija < ija_next || add_diag) {
      ija = nm::yale_storage::binary_search_left_boundary(rhs, ija, ija_next-1, rhs->offset[1]);

      LIST* curr_row = list::create();

      LDType* insert_val;

      while (ija < ija_next) {
        // Find first column in slice
        IType rj = rhs_ija[ija];
        IType j  = rj - rhs->offset[1];

        // Is there a nonzero diagonal item between the previously added item and the current one?
        if (rj > ri && add_diag) {
          // Allocate and copy insertion value
          insert_val  = NM_ALLOC_N(LDType, 1);
          *insert_val = static_cast<LDType>(rhs_a[ri]);

          // Insert the item in the list at the appropriate location.
          // What is the appropriate key? Well, it's definitely right(i)==right(j), but the
          // rj index has already been advanced past ri. So we should treat ri as the column and
          // subtract offset[1].
          if (last_added)   last_added = list::insert_after(last_added, ri - rhs->offset[1], insert_val);
          else              last_added = list::insert(curr_row, false,  ri - rhs->offset[1], insert_val);

          // don't add again!
          add_diag = false;
        }

        // now allocate and add the current item
        insert_val  = NM_ALLOC_N(LDType, 1);
        *insert_val = static_cast<LDType>(rhs_a[ija]);

        if (last_added)      last_added = list::insert_after(last_added, j, insert_val);
        else                last_added = list::insert(curr_row, false, j, insert_val);

        ++ija; // move to next entry in Yale matrix
      }

      if (add_diag) {

        // still haven't added the diagonal.
        insert_val         = NM_ALLOC_N(LDType, 1);
        *insert_val        = static_cast<LDType>(rhs_a[ri]);

        // insert the item in the list at the appropriate location
        if (last_added)      last_added = list::insert_after(last_added, ri - rhs->offset[1], insert_val);
        else                last_added = list::insert(curr_row, false, ri - rhs->offset[1], insert_val);

        // no need to set add_diag to false because it'll be reset automatically in next iteration.
      }

      // Now add the list at the appropriate location
      if (last_row_added)   last_row_added = list::insert_after(last_row_added, i, curr_row);
      else                  last_row_added = list::insert(lhs->rows, false, i, curr_row);
    }

    // end of walk through rows
  }

  nm_yale_storage_unregister(rhs);

  return lhs;
}


/* Copy dense into lists recursively
 *
 * FIXME: This works, but could probably be cleaner (do we really need to pass coords around?)
 */
template <typename LDType, typename RDType>
static bool cast_copy_contents_dense(LIST* lhs, const RDType* rhs, RDType* zero, size_t& pos, size_t* coords, const size_t* shape, size_t dim, size_t recursions) {

  nm_list_storage_register_list(lhs, recursions);

  NODE *prev = NULL;
  LIST *sub_list;
  bool added = false, added_list = false;
  //void* insert_value;

  for (coords[dim-1-recursions] = 0; coords[dim-1-recursions] < shape[dim-1-recursions]; ++coords[dim-1-recursions], ++pos) {

    if (recursions == 0) {
      // create nodes

      if (rhs[pos] != *zero) {
        // is not zero

        // Create a copy of our value that we will insert in the list
        LDType* insert_value = NM_ALLOC_N(LDType, 1);
        *insert_value        = static_cast<LDType>(rhs[pos]);

        if (!lhs->first)    prev = list::insert(lhs, false, coords[dim-1-recursions], insert_value);
        else                 prev = list::insert_after(prev, coords[dim-1-recursions], insert_value);

        added = true;
      }
      // no need to do anything if the element is zero

    } else { // create lists
      // create a list as if there's something in the row in question, and then delete it if nothing turns out to be there
      sub_list = list::create();

      added_list = list_storage::cast_copy_contents_dense<LDType,RDType>(sub_list, rhs, zero, pos, coords, shape, dim, recursions-1);

      if (!added_list)        list::del(sub_list, recursions-1);
      else if (!lhs->first)    prev = list::insert(lhs, false, coords[dim-1-recursions], sub_list);
      else                    prev = list::insert_after(prev, coords[dim-1-recursions], sub_list);

      // added = (added || added_list);
    }
  }

  nm_list_storage_unregister_list(lhs, recursions);

  coords[dim-1-recursions] = 0;
  --pos;

  return added;
}

} // end of namespace list_storage


namespace yale_storage { // FIXME: Move to yale.cpp
  /*
   * Creation of yale storage from dense storage.
   */
  template <typename LDType, typename RDType>
  YALE_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype, void* init) {

    if (rhs->dim != 2) rb_raise(nm_eStorageTypeError, "can only convert matrices of dim 2 to yale");

    nm_dense_storage_register(rhs);

    IType pos = 0;
    IType ndnz = 0;

    // We need a zero value. This should nearly always be zero, but sometimes you might want false or nil.
    LDType    L_INIT(0);
    if (init) {
      if (l_dtype == RUBYOBJ) L_INIT = *reinterpret_cast<VALUE*>(init);
      else                    L_INIT = *reinterpret_cast<LDType*>(init);
    }
    RDType R_INIT = static_cast<RDType>(L_INIT);

    RDType* rhs_elements = reinterpret_cast<RDType*>(rhs->elements);

    // First, count the non-diagonal nonzeros
    for (size_t i = rhs->shape[0]; i-- > 0;) {
      for (size_t j = rhs->shape[1]; j-- > 0;) {
        pos = rhs->stride[0]*(i + rhs->offset[0]) + rhs->stride[1]*(j + rhs->offset[1]);
        if (i != j && rhs_elements[pos] != R_INIT)  ++ndnz;

        // move forward 1 position in dense matrix elements array
      }
    }

    // Copy shape for yale construction
    size_t* shape = NM_ALLOC_N(size_t, 2);
    shape[0] = rhs->shape[0];
    shape[1] = rhs->shape[1];

    size_t request_capacity = shape[0] + ndnz + 1;

    // Create with minimum possible capacity -- just enough to hold all of the entries
    YALE_STORAGE* lhs = nm_yale_storage_create(l_dtype, shape, 2, request_capacity);

    if (lhs->capacity < request_capacity)
      rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", (unsigned long)request_capacity, (unsigned long)(lhs->capacity));

    LDType* lhs_a     = reinterpret_cast<LDType*>(lhs->a);
    IType* lhs_ija    = lhs->ija;

    // Set the zero position in the yale matrix
    lhs_a[shape[0]]   = L_INIT;

    // Start just after the zero position.
    IType ija = shape[0]+1;
    pos       = 0;

    // Copy contents
    for (IType i = 0; i < rhs->shape[0]; ++i) {
      // indicate the beginning of a row in the IJA array
      lhs_ija[i] = ija;

      for (IType j = 0; j < rhs->shape[1];  ++j) {
        pos = rhs->stride[0] * (i + rhs->offset[0]) + rhs->stride[1] * (j + rhs->offset[1]); // calc position with offsets

        if (i == j) { // copy to diagonal
          lhs_a[i]     = static_cast<LDType>(rhs_elements[pos]);
        } else if (rhs_elements[pos] != R_INIT) { // copy nonzero to LU
          lhs_ija[ija] = j; // write column index
          lhs_a[ija]   = static_cast<LDType>(rhs_elements[pos]);

          ++ija;
        }
      }
    }

    lhs_ija[shape[0]] = ija; // indicate the end of the last row
    lhs->ndnz = ndnz;

    nm_dense_storage_unregister(rhs);

    return lhs;
  }

  /*
   * Creation of yale storage from list storage.
   */
  template <typename LDType, typename RDType>
  YALE_STORAGE* create_from_list_storage(const LIST_STORAGE* rhs, nm::dtype_t l_dtype) {
    if (rhs->dim != 2) rb_raise(nm_eStorageTypeError, "can only convert matrices of dim 2 to yale");

    if (rhs->dtype == RUBYOBJ) {
      VALUE init_val = *reinterpret_cast<VALUE*>(rhs->default_val);
      if (rb_funcall(init_val, rb_intern("!="), 1, Qnil) == Qtrue && rb_funcall(init_val, rb_intern("!="), 1, Qfalse) == Qtrue && rb_funcall(init_val, rb_intern("!="), 1, INT2FIX(0)) == Qtrue)
        rb_raise(nm_eStorageTypeError, "list matrix of Ruby objects must have default value equal to 0, nil, or false to convert to yale");
    } else if (strncmp(reinterpret_cast<const char*>(rhs->default_val), "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", DTYPE_SIZES[rhs->dtype]))
      rb_raise(nm_eStorageTypeError, "list matrix of non-Ruby objects must have default value of 0 to convert to yale");

    nm_list_storage_register(rhs);

    size_t ndnz = nm_list_storage_count_nd_elements(rhs);
    // Copy shape for yale construction
    size_t* shape = NM_ALLOC_N(size_t, 2);
    shape[0] = rhs->shape[0];
    shape[1] = rhs->shape[1];

    size_t request_capacity = shape[0] + ndnz + 1;
    YALE_STORAGE* lhs = nm_yale_storage_create(l_dtype, shape, 2, request_capacity);

    if (lhs->capacity < request_capacity)
      rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", (unsigned long)request_capacity, (unsigned long)(lhs->capacity));

    // Initialize the A and IJA arrays
    init<LDType>(lhs, rhs->default_val);

    IType*  lhs_ija = lhs->ija;
    LDType* lhs_a   = reinterpret_cast<LDType*>(lhs->a);

    IType ija = lhs->shape[0]+1;

    // Copy contents 
    for (NODE* i_curr = rhs->rows->first; i_curr; i_curr = i_curr->next) {

      // Shrink reference
      int i = i_curr->key - rhs->offset[0];
      if (i < 0 || i >= (int)rhs->shape[0]) continue;

      for (NODE* j_curr = ((LIST*)(i_curr->val))->first; j_curr; j_curr = j_curr->next) {
        
        // Shrink reference
        int j = j_curr->key - rhs->offset[1];
        if (j < 0 || j >= (int)rhs->shape[1]) continue;

        LDType cast_jcurr_val = *reinterpret_cast<RDType*>(j_curr->val);
        if (i_curr->key - rhs->offset[0] == j_curr->key - rhs->offset[1])
          lhs_a[i_curr->key - rhs->offset[0]] = cast_jcurr_val; // set diagonal
        else {
          lhs_ija[ija] = j_curr->key - rhs->offset[1];    // set column value

          lhs_a[ija]   = cast_jcurr_val;                      // set cell value

          ++ija;
          // indicate the beginning of a row in the IJA array
          for (size_t i = i_curr->key - rhs->offset[0] + 1; i < rhs->shape[0] + rhs->offset[0]; ++i) {
            lhs_ija[i] = ija;
          }

        }
      }

    }
    
    lhs_ija[rhs->shape[0]] = ija; // indicate the end of the last row
    lhs->ndnz = ndnz;

    nm_list_storage_unregister(rhs);

    return lhs;
  }

} // end of namespace yale_storage
} // end of namespace nm

extern "C" {

  /*
   * The following functions represent stype casts -- conversions from one
   * stype to another. Each of these is the C accessor for a templated C++
   * function.
   */


  STORAGE* nm_yale_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void* init) {
    NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::create_from_dense_storage, YALE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t l_dtype, void*);

    if (!ttable[l_dtype][right->dtype]) {
      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
      return NULL;
    }

    return (STORAGE*)ttable[l_dtype][right->dtype]((const DENSE_STORAGE*)right, l_dtype, init);
  }

  STORAGE* nm_yale_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
    NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::create_from_list_storage, YALE_STORAGE*, const LIST_STORAGE* rhs, nm::dtype_t l_dtype);

    if (!ttable[l_dtype][right->dtype]) {
      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
      return NULL;
    }

    return (STORAGE*)ttable[l_dtype][right->dtype]((const LIST_STORAGE*)right, l_dtype);
  }

  STORAGE* nm_dense_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
    NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::create_from_list_storage, DENSE_STORAGE*, const LIST_STORAGE* rhs, nm::dtype_t l_dtype);

    if (!ttable[l_dtype][right->dtype]) {
      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
      return NULL;
    }

    return (STORAGE*)ttable[l_dtype][right->dtype]((const LIST_STORAGE*)right, l_dtype);
  }

  STORAGE* nm_dense_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
    NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::create_from_yale_storage, DENSE_STORAGE*, const YALE_STORAGE* rhs, nm::dtype_t l_dtype);

    const YALE_STORAGE* casted_right = reinterpret_cast<const YALE_STORAGE*>(right);

    if (!ttable[l_dtype][right->dtype]) {
      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
      return NULL;
    }

    return reinterpret_cast<STORAGE*>(ttable[l_dtype][right->dtype](casted_right, l_dtype));
  }

  STORAGE* nm_list_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void* init) {
    NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::create_from_dense_storage, LIST_STORAGE*, const DENSE_STORAGE*, nm::dtype_t, void*);

    if (!ttable[l_dtype][right->dtype]) {
      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
      return NULL;
    }

    return (STORAGE*)ttable[l_dtype][right->dtype]((DENSE_STORAGE*)right, l_dtype, init);
  }

  STORAGE* nm_list_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype, void* dummy) {
    NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::list_storage::create_from_yale_storage, LIST_STORAGE*, const YALE_STORAGE* rhs, nm::dtype_t l_dtype);

    const YALE_STORAGE* casted_right = reinterpret_cast<const YALE_STORAGE*>(right);

    if (!ttable[l_dtype][right->dtype]) {
      rb_raise(nm_eDataTypeError, "casting between these dtypes is undefined");
      return NULL;
    }

    return (STORAGE*)ttable[l_dtype][right->dtype](casted_right, l_dtype);
  }

} // end of extern "C"


================================================
FILE: ext/nmatrix/storage/storage.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == storage.h
//
// This file brings together everything in the storage directory.  It should not
// be included by anything in the storage directory, but should be included by
// files needing to use the storage code.

#ifndef STORAGE_H
#define STORAGE_H

/*
 * Standard Includes
 */

#include <ruby.h>
#include <cstdlib>

/*
 * Project Includes
 */

#include "types.h"

#include "data/data.h"

#include "common.h"
#include "dense/dense.h"
#include "list/list.h"
#include "yale/yale.h"

/*
 * Macros
 */

#define NMATRIX_DTYPE_IS_COMPLEX(s)    ((s->dtype == nm::COMPLEX64) or (s->dtype == nm::COMPLEX128))
#define NMATRIX_DTYPE_IS_FLOAT(s)      ((s->dtype == nm::FLOAT32) or (s->dtype == nm::FLOAT64))
#define NMATRIX_DTYPE_IS_INTEGER(s)    (s->dtype <= nm::INT64)
#define NMATRIX_DTYPE_IS_RUBYOBJ(s)    (s->dtype == nm::RUBYOBJ)


/*
 * Types
 */


/*
 * Data
 */

namespace nm {
  const int NUM_STYPES = 3;
}

extern "C" {

  extern const char* const STYPE_NAMES[nm::NUM_STYPES];
  extern void (* const STYPE_MARK[nm::NUM_STYPES])(STORAGE*);

  /*
   * Functions
   */

  /////////////////////////
  // Copying and Casting //
  /////////////////////////

  STORAGE*    nm_dense_storage_from_list(const STORAGE* right, nm::dtype_t l_dtype, void*);
  STORAGE*    nm_dense_storage_from_yale(const STORAGE* right, nm::dtype_t l_dtype, void*);
  STORAGE*    nm_list_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void*);
  STORAGE*    nm_list_storage_from_yale(const STORAGE* right,  nm::dtype_t l_dtype, void*);
  STORAGE*    nm_yale_storage_from_list(const STORAGE* right,  nm::dtype_t l_dtype, void*);
  STORAGE*    nm_yale_storage_from_dense(const STORAGE* right, nm::dtype_t l_dtype, void*);

} // end of extern "C" block


#endif // STORAGE_H


================================================
FILE: ext/nmatrix/storage/yale/class.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == class.h
//
// Object-oriented interface for Yale.
//

#ifndef YALE_CLASS_H
# define YALE_CLASS_H

#include "../dense/dense.h"
#include "math/transpose.h"
#include "yale.h"

namespace nm {


/*
 * This class is basically an intermediary for YALE_STORAGE objects which enables us to treat it like a C++ object. It
 * keeps the src pointer as its s, along with other relevant slice information.
 *
 * It's useful for creating iterators and such. It isn't responsible for allocating or freeing its YALE_STORAGE* pointers.
 */

template <typename D>
class YaleStorage {
public:
  YaleStorage(const YALE_STORAGE* storage)
   : s(reinterpret_cast<YALE_STORAGE*>(storage->src)),
     slice(storage != storage->src),
     slice_shape(storage->shape),
     slice_offset(storage->offset)
  {
    nm_yale_storage_register(storage->src);
  }

  YaleStorage(const STORAGE* storage)
   : s(reinterpret_cast<YALE_STORAGE*>(storage->src)),
     slice(storage != storage->src),
     slice_shape(storage->shape),
     slice_offset(storage->offset)
  {
    nm_yale_storage_register(reinterpret_cast<STORAGE*>(storage->src));
  }

  ~YaleStorage() {
    nm_yale_storage_unregister(s);
  }

  /* Allows us to do YaleStorage<uint8>::dtype() to get an nm::dtype_t */
  static nm::dtype_t dtype() {
    return nm::ctype_to_dtype_enum<D>::value_type;
  }


  bool is_ref() const { return slice; }

  inline D* default_obj_ptr() { return &(a(s->shape[0])); }
  inline D& default_obj() { return a(s->shape[0]); }
  inline const D& default_obj() const { return a(s->shape[0]); }
  inline const D& const_default_obj() const { return a(s->shape[0]); }


  /*
   * Return a Ruby VALUE representation of default_obj()
   */
  VALUE const_default_value() const {
    return nm::yale_storage::nm_rb_dereference(a(s->shape[0]));
  }

  inline size_t* ija_p()       const       { return reinterpret_cast<size_t*>(s->ija); }
  inline const size_t& ija(size_t p) const { return ija_p()[p]; }
  inline size_t& ija(size_t p)             { return ija_p()[p]; }
  inline D* a_p()         const       { return reinterpret_cast<D*>(s->a); }
  inline const D& a(size_t p) const   { return a_p()[p]; }
  inline D& a(size_t p)               { return a_p()[p]; }

  bool real_row_empty(size_t i) const { return ija(i+1) - ija(i) == 0 ? true : false; }

  inline size_t* shape_p()        const { return slice_shape;      }
  inline size_t  shape(uint8_t d) const { return slice_shape[d];   }
  inline size_t* real_shape_p() const { return s->shape;           }
  inline size_t  real_shape(uint8_t d) const { return s->shape[d]; }
  inline size_t* offset_p()     const { return slice_offset;       }
  inline size_t  offset(uint8_t d) const { return slice_offset[d]; }
  inline size_t  capacity() const { return s->capacity;            }
  inline size_t  size() const { return ija(real_shape(0));         }


  /*
   * Returns true if the value at apos is the default value.
   * Mainly used for determining if the diagonal contains zeros.
   */
  bool is_pos_default_value(size_t apos) const {
    return (a(apos) == const_default_obj());
  }

  /*
   * Given a size-2 array of size_t, representing the shape, determine
   * the maximum size of YaleStorage arrays.
   */
  static size_t max_size(const size_t* shape) {
    size_t result = shape[0] * shape[1] + 1;
    if (shape[0] > shape[1])
      result += shape[0] - shape[1];
    return result;
  }

  /*
   * Minimum size of Yale Storage arrays given some shape.
   */
  static size_t min_size(const size_t* shape) {
    return shape[0]*2 + 1;
  }

  /*
   * This is the guaranteed maximum size of the IJA/A arrays of the matrix given its shape.
   */
  inline size_t real_max_size() const {
    return YaleStorage<D>::max_size(real_shape_p());
  }

  // Binary search between left and right in IJA for column ID real_j. Returns left if not found.
  size_t real_find_pos(size_t left, size_t right, size_t real_j, bool& found) const {
    if (left > right) {
      found = false;
      return left;
    }

    size_t mid   = (left + right) / 2;
    size_t mid_j = ija(mid);

    if (mid_j == real_j) {
      found = true;
      return mid;
    } else if (mid_j > real_j)  return real_find_pos(left, mid - 1, real_j, found);
    else                        return real_find_pos(mid + 1, right, real_j, found);
  }

  // Binary search between left and right in IJA for column ID real_j. Essentially finds where the slice should begin,
  // with no guarantee that there's anything in there.
  size_t real_find_left_boundary_pos(size_t left, size_t right, size_t real_j) const {
    if (left > right) return right;
    if (ija(left) >= real_j) return left;

    size_t mid   = (left + right) / 2;
    size_t mid_j = ija(mid);

    if (mid_j == real_j)      return mid;
    else if (mid_j > real_j)  return real_find_left_boundary_pos(left, mid, real_j);
    else                      return real_find_left_boundary_pos(mid + 1, right, real_j);
  }

  // Binary search between left and right in IJA for column ID real_j. Essentially finds where the slice should begin,
  // with no guarantee that there's anything in there.
  size_t real_find_right_boundary_pos(size_t left, size_t right, size_t real_j) const {
    if (left > right) return right;
    if (ija(right) <= real_j) return right;

    size_t mid   = (left + right) / 2;
    size_t mid_j = ija(mid);

    if (mid_j == real_j)      return mid;
    else if (mid_j > real_j)  return real_find_right_boundary_pos(left, mid, real_j);
    else                      return real_find_right_boundary_pos(mid + 1, right, real_j);
  }


  // Binary search for coordinates i,j in the slice. If not found, return -1.
  std::pair<size_t,bool> find_pos(const std::pair<size_t,size_t>& ij) const {
    size_t left   = ija(ij.first + offset(0));
    size_t right  = ija(ij.first + offset(0) + 1) - 1;

    std::pair<size_t, bool> result;
    result.first = real_find_pos(left, right, ij.second + offset(1), result.second);
    return result;
  }

  // Binary search for coordinates i,j in the slice, and return the first position >= j in row i.
  size_t find_pos_for_insertion(size_t i, size_t j) const {
    size_t left   = ija(i + offset(0));
    size_t right  = ija(i + offset(0) + 1) - 1;

    // Check that the right search point is valid. rflbp will check to make sure the left is valid relative to left.
    if (right > ija(real_shape(0))) {
      right = ija(real_shape(0))-1;
    }
    size_t result = real_find_left_boundary_pos(left, right, j + offset(1));
    return result;
  }

  typedef yale_storage::basic_iterator_T<D,D,YaleStorage<D> >              basic_iterator;
  typedef yale_storage::basic_iterator_T<D,const D,const YaleStorage<D> >  const_basic_iterator;

  typedef yale_storage::stored_diagonal_iterator_T<D,D,YaleStorage<D> >              stored_diagonal_iterator;
  typedef yale_storage::stored_diagonal_iterator_T<D,const D,const YaleStorage<D> >  const_stored_diagonal_iterator;

  typedef yale_storage::iterator_T<D,D,YaleStorage<D> >                iterator;
  typedef yale_storage::iterator_T<D,const D,const YaleStorage<D> >    const_iterator;


  friend class yale_storage::row_iterator_T<D,D,YaleStorage<D> >;
  typedef yale_storage::row_iterator_T<D,D,YaleStorage<D> >             row_iterator;
  typedef yale_storage::row_iterator_T<D,const D,const YaleStorage<D> > const_row_iterator;

  typedef yale_storage::row_stored_iterator_T<D,D,YaleStorage<D>,row_iterator>    row_stored_iterator;
  typedef yale_storage::row_stored_nd_iterator_T<D,D,YaleStorage<D>,row_iterator> row_stored_nd_iterator;
  typedef yale_storage::row_stored_iterator_T<D,const D,const YaleStorage<D>,const_row_iterator>       const_row_stored_iterator;
  typedef yale_storage::row_stored_nd_iterator_T<D,const D,const YaleStorage<D>,const_row_iterator>    const_row_stored_nd_iterator;
  typedef std::pair<row_iterator,row_stored_nd_iterator>                                               row_nd_iter_pair;

  // Variety of iterator begin and end functions.
  iterator begin(size_t row = 0)                      {      return iterator(*this, row);                 }
  iterator row_end(size_t row)                        {      return begin(row+1);                         }
  iterator end()                                      {      return iterator(*this, shape(0));            }
  const_iterator cbegin(size_t row = 0) const         {      return const_iterator(*this, row);           }
  const_iterator crow_end(size_t row) const           {      return cbegin(row+1);                        }
  const_iterator cend() const                         {      return const_iterator(*this, shape(0));      }

  stored_diagonal_iterator sdbegin(size_t d = 0)      {      return stored_diagonal_iterator(*this, d);   }
  stored_diagonal_iterator sdend()                    {
    return stored_diagonal_iterator(*this, std::min( shape(0) + offset(0), shape(1) + offset(1) ) - std::max(offset(0), offset(1)) );
  }
  const_stored_diagonal_iterator csdbegin(size_t d = 0) const { return const_stored_diagonal_iterator(*this, d); }
  const_stored_diagonal_iterator csdend() const        {
    return const_stored_diagonal_iterator(*this, std::min( shape(0) + offset(0), shape(1) + offset(1) ) - std::max(offset(0), offset(1)) );
  }
  row_iterator ribegin(size_t row = 0)                {      return row_iterator(*this, row);             }
  row_iterator riend()                                {      return row_iterator(*this, shape(0));        }
  const_row_iterator cribegin(size_t row = 0) const   {      return const_row_iterator(*this, row);       }
  const_row_iterator criend() const                   {      return const_row_iterator(*this, shape(0));  }


  /*
   * Get a count of the ndnz in the slice as if it were its own matrix.
   */
  size_t count_copy_ndnz() const {
    if (!slice) return s->ndnz; // easy way -- not a slice.
    size_t count = 0;

    // Visit all stored entries.
    for (const_row_iterator it = cribegin(); it != criend(); ++it){
      for (auto jt = it.begin(); jt != it.end(); ++jt) {
        if (it.i() != jt.j() && *jt != const_default_obj()) ++count;
      }
    }

    return count;
  }

  /*
   * Returns the iterator for i,j or snd_end() if not found.
   */
/*  stored_nondiagonal_iterator find(const std::pair<size_t,size_t>& ij) {
    std::pair<size_t,bool> find_pos_result = find_pos(ij);
    if (!find_pos_result.second) return sndend();
    else return stored_nondiagonal_iterator(*this, ij.first, find_pos_result.first);
  } */

  /*
   * Returns a stored_nondiagonal_iterator pointing to the location where some coords i,j should go, or returns their
   * location if present.
   */
  /*std::pair<row_iterator, row_stored_nd_iterator> lower_bound(const std::pair<size_t,size_t>& ij)  {
    row_iterator it            = ribegin(ij.first);
    row_stored_nd_iterator jt  = it.lower_bound(ij.second);
    return std::make_pair(it,jt);
  } */

  class multi_row_insertion_plan {
  public:
    std::vector<size_t>   pos;
    std::vector<int>      change;
    int                   total_change; // the net change occurring
    size_t                num_changes;  // the total number of rows that need to change size
    multi_row_insertion_plan(size_t rows_in_slice) : pos(rows_in_slice), change(rows_in_slice), total_change(0), num_changes(0) { }

    void add(size_t i, const std::pair<int,size_t>& change_and_pos) {
      pos[i]        = change_and_pos.second;
      change[i]     = change_and_pos.first;
      total_change += change_and_pos.first;
      if (change_and_pos.first != 0) num_changes++;
    }
  };


  /*
   * Find all the information we need in order to modify multiple rows.
   */
  multi_row_insertion_plan insertion_plan(row_iterator i, size_t j, size_t* lengths, D* const v, size_t v_size) const {
    multi_row_insertion_plan p(lengths[0]);

    // v_offset is our offset in the array v. If the user wants to change two elements in each of three rows,
    // but passes an array of size 3, we need to know that the second insertion plan must start at position
    // 2 instead of 0; and then the third must start at 1.
    size_t v_offset = 0;
    for (size_t m = 0; m < lengths[0]; ++m, ++i) {
      p.add(m, i.single_row_insertion_plan(j, lengths[1], v, v_size, v_offset));
    }

    return p;
  }


  /*
   * Insert entries in multiple rows. Slice-setting.
   */
  void insert(row_iterator i, size_t j, size_t* lengths, D* const v, size_t v_size) {
    // Expensive pre-processing step: find all the information we need in order to do insertions.
    multi_row_insertion_plan p = insertion_plan(i, j, lengths, v, v_size);

    // There are more efficient ways to do this, but this is the low hanging fruit version of the algorithm.
    // Here's the full problem: http://stackoverflow.com/questions/18753375/algorithm-for-merging-short-lists-into-a-long-vector
    // --JW

    bool resize = false;
    size_t sz = size();
    if (p.num_changes > 1) resize = true; // TODO: There are surely better ways to do this, but I've gone for the low-hanging fruit
    else if (sz + p.total_change > capacity() || sz + p.total_change <= capacity() / nm::yale_storage::GROWTH_CONSTANT) resize = true;

    if (resize) {
      update_resize_move_insert(i.i() + offset(0), j + offset(1), lengths, v, v_size, p);
    } else {

      // Make the necessary modifications, which hopefully can be done in-place.
      size_t v_offset = 0;
      //int accum       = 0;
      for (size_t ii = 0; ii < lengths[0]; ++ii, ++i) {
        i.insert(row_stored_nd_iterator(i, p.pos[ii]), j, lengths[1], v, v_size, v_offset);
      }
    }
  }


  /*
   * Most Ruby-centric insert function. Accepts coordinate information in slice,
   * and value information of various types in +right+. This function must evaluate
   * +right+ and determine what other functions to call in order to properly handle
   * it.
   */
  void insert(SLICE* slice, VALUE right) {

    NM_CONSERVATIVE(nm_register_value(&right));

    std::pair<NMATRIX*,bool> nm_and_free =
      interpret_arg_as_dense_nmatrix(right, dtype());
    // Map the data onto D* v

    D*     v;
    size_t v_size = 1;

    if (nm_and_free.first) {
      DENSE_STORAGE* s = reinterpret_cast<DENSE_STORAGE*>(nm_and_free.first->storage);
      v       = reinterpret_cast<D*>(s->elements);
      v_size  = nm_storage_count_max_elements(s);

    } else if (RB_TYPE_P(right, T_ARRAY)) {
      v_size = RARRAY_LEN(right);
      v      = NM_ALLOC_N(D, v_size);
      if (dtype() == nm::RUBYOBJ) {
       nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
      }
      for (size_t m = 0; m < v_size; ++m) {
        rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
      }
      if (dtype() == nm::RUBYOBJ) {
       nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
      }

    } else {
      v = reinterpret_cast<D*>(rubyobj_to_cval(right, dtype()));
    }

    row_iterator i = ribegin(slice->coords[0]);

    if (slice->single || (slice->lengths[0] == 1 && slice->lengths[1] == 1)) { // single entry
      i.insert(slice->coords[1], *v);
    } else if (slice->lengths[0] == 1) { // single row, multiple entries
      i.insert(slice->coords[1], slice->lengths[1], v, v_size);
    } else { // multiple rows, unknown number of entries
      insert(i, slice->coords[1], slice->lengths, v, v_size);
    }

    // Only free v if it was allocated in this function.
    if (nm_and_free.first) {
      if (nm_and_free.second) {
        nm_delete(nm_and_free.first);
      }
    } else NM_FREE(v);

    NM_CONSERVATIVE(nm_unregister_value(&right));
  }


  /*
   * Remove an entry from an already found non-diagonal position.
   */
  row_iterator erase(row_iterator it, const row_stored_nd_iterator& position) {
    it.erase(position);
    return it;
  }


  /*
   * Remove an entry from the matrix at the already-located position. If diagonal, just sets to default; otherwise,
   * actually removes the entry.
   */
  row_iterator erase(row_iterator it, const row_stored_iterator& jt) {
    it.erase((const row_stored_nd_iterator&)jt);
    return it;
  }


  row_iterator insert(row_iterator it, row_stored_iterator position, size_t j, const D& val) {
    it.insert(position, j, val);
    return it;
  }


  /*
   * Insert an element in column j, using position's p() as the location to insert the new column. i and j will be the
   * coordinates. This also does a replace if column j is already present.
   *
   * Returns true if a new entry was added and false if an entry was replaced.
   *
   * Pre-conditions:
   *   - position.p() must be between ija(real_i) and ija(real_i+1), inclusive, where real_i = i + offset(0)
   *   - real_i and real_j must not be equal
   */
  row_iterator insert(row_iterator it, row_stored_nd_iterator position, size_t j, const D& val) {
    it.insert(position, j, val);
    return it;
  }


  /*
   * Insert n elements v in columns j, using position as a guide. i gives the starting row. If at any time a value in j
   * decreases,
   */
  /*bool insert(stored_iterator position, size_t n, size_t i, size_t* j, DType* v) {

  } */

  /*
   * A pseudo-insert operation, since the diagonal portion of the A array is constant size.
   */
  stored_diagonal_iterator insert(stored_diagonal_iterator position, const D& val) {
    *position = val;
    return position;
  }


/*  iterator insert(iterator position, size_t j, const D& val) {
    if (position.real_i() == position.real_j()) {
      s->a(position.real_i()) = val;
      return position;
    } else {
      row_iterator it = ribegin(position.i());
      row_stored_nd_iterator position = it.ndbegin(j);
      return insert(it, position, j, val);
    }
  }*/


  /*
   * Returns a pointer to the location of some entry in the matrix.
   *
   * This is needed for backwards compatibility. We don't really want anyone
   * to modify the contents of that pointer, because it might be the ZERO location.
   *
   * TODO: Change all storage_get functions to return a VALUE once we've put list and
   * dense in OO mode. ???
   */
  inline D* get_single_p(SLICE* slice) {
    size_t real_i = offset(0) + slice->coords[0],
           real_j = offset(1) + slice->coords[1];

    if (real_i == real_j)
      return &(a(real_i));

    if (ija(real_i) == ija(real_i+1))
      return default_obj_ptr(); // zero pointer

    // binary search for a column's location
    std::pair<size_t,bool> p = find_pos(std::make_pair(slice->coords[0], slice->coords[1]));
    if (p.second)
      return &(a(p.first));
                       // not found: return default
    return default_obj_ptr(); // zero pointer
  }


  /*
   * Allocate a reference pointing to s. Note that even if +this+ is a reference,
   * we can create a reference within it.
   *
   * Note: Make sure you NM_FREE() the result of this call. You can't just cast it
   * directly into a YaleStorage<D> class.
   */
  YALE_STORAGE* alloc_ref(SLICE* slice) {
    YALE_STORAGE* ns  = NM_ALLOC( YALE_STORAGE );

    ns->dim           = s->dim;
    ns->offset        = NM_ALLOC_N(size_t, ns->dim);
    ns->shape         = NM_ALLOC_N(size_t, ns->dim);

    for (size_t d = 0; d < ns->dim; ++d) {
      ns->offset[d]   = slice->coords[d]  + offset(d);
      ns->shape[d]    = slice->lengths[d];
    }

    ns->dtype         = s->dtype;
    ns->a             = a_p();
    ns->ija           = ija_p();

    ns->src           = s;
    s->count++;

    ns->ndnz          = 0;
    ns->capacity      = 0;

    return ns;
  }


  /*
   * Allocates and initializes the basic struct (but not IJA or A vectors).
   */
  static YALE_STORAGE* alloc(size_t* shape, size_t dim = 2) {
    YALE_STORAGE* s = NM_ALLOC( YALE_STORAGE );

    s->ndnz         = 0;
    s->dtype        = dtype();
    s->shape        = shape;
    s->offset       = NM_ALLOC_N(size_t, dim);
    for (size_t d = 0; d < dim; ++d)
      s->offset[d]  = 0;
    s->dim          = dim;
    s->src          = reinterpret_cast<STORAGE*>(s);
    s->count        = 1;

    return s;
  }


  /*
   * Create basic storage of same dtype as YaleStorage<D>. Allocates it,
   * reserves necessary space, but doesn't fill structure at all.
   */
  static YALE_STORAGE* create(size_t* shape, size_t reserve) {

    YALE_STORAGE* s = alloc( shape, 2 );
    size_t max_sz   = YaleStorage<D>::max_size(shape),
           min_sz   = YaleStorage<D>::min_size(shape);

    if (reserve < min_sz) {
      s->capacity = min_sz;
    } else if (reserve > max_sz) {
      s->capacity = max_sz;
    } else {
      s->capacity = reserve;
    }

    s->ija = NM_ALLOC_N( size_t, s->capacity );
    s->a   = NM_ALLOC_N( D,      s->capacity );

    return s;
  }


  /*
   * Clear out the D portion of the A vector (clearing the diagonal and setting
   * the zero value).
   */
  static void clear_diagonal_and_zero(YALE_STORAGE& s, D* init_val = NULL) {
    D* a  = reinterpret_cast<D*>(s.a);

    // Clear out the diagonal + one extra entry
    if (init_val) {
      for (size_t i = 0; i <= s.shape[0]; ++i)
        a[i] = *init_val;
    } else {
      for (size_t i = 0; i <= s.shape[0]; ++i)
        a[i] = 0;
    }
  }


  /*
   * Empty the matrix by initializing the IJA vector and setting the diagonal to 0.
   *
   * Called when most YALE_STORAGE objects are created.
   *
   * Can't go inside of class YaleStorage because YaleStorage creation requires that
   * IJA already be initialized.
   */
  static void init(YALE_STORAGE& s, D* init_val) {
    size_t IA_INIT = s.shape[0] + 1;
    for (size_t m = 0; m < IA_INIT; ++m) {
      s.ija[m] = IA_INIT;
    }

    clear_diagonal_and_zero(s, init_val);
  }


  /*
   * Make a very basic allocation. No structure or copy or anything. It'll be shaped like this
   * matrix.
   *
   * TODO: Combine this with ::create()'s ::alloc(). These are redundant.
   */
   template <typename E>
   YALE_STORAGE* alloc_basic_copy(size_t new_capacity, size_t new_ndnz) const {
     nm::dtype_t new_dtype = nm::ctype_to_dtype_enum<E>::value_type;
     YALE_STORAGE* lhs     = NM_ALLOC( YALE_STORAGE );
     lhs->dim              = s->dim;
     lhs->shape            = NM_ALLOC_N( size_t, lhs->dim );

     lhs->shape[0]         = shape(0);
     lhs->shape[1]         = shape(1);

     lhs->offset           = NM_ALLOC_N( size_t, lhs->dim );

     lhs->offset[0]        = 0;
     lhs->offset[1]        = 0;

     lhs->capacity         = new_capacity;
     lhs->dtype            = new_dtype;
     lhs->ndnz             = new_ndnz;
     lhs->ija              = NM_ALLOC_N( size_t, new_capacity );
     lhs->a                = NM_ALLOC_N( E,      new_capacity );
     lhs->src              = lhs;
     lhs->count            = 1;

     return lhs;
   }


  /*
   * Make a full matrix structure copy (entries remain uninitialized). Remember to NM_FREE()!
   */
  template <typename E>
  YALE_STORAGE* alloc_struct_copy(size_t new_capacity) const {
    YALE_STORAGE* lhs     = alloc_basic_copy<E>(new_capacity, count_copy_ndnz());
    // Now copy the IJA contents
    if (slice) {
      rb_raise(rb_eNotImpError, "cannot copy struct due to different offsets");
    } else {
      for (size_t m = 0; m < size(); ++m) {
        lhs->ija[m] = ija(m); // copy indices
      }
    }
    return lhs;
  }


  /*
   * Copy this slice (or the full matrix if it isn't a slice) into a new matrix which is already allocated, ns.
   */
  template <typename E, bool Yield=false>
  void copy(YALE_STORAGE& ns) const {
    //nm::dtype_t new_dtype = nm::ctype_to_dtype_enum<E>::value_type;
    // get the default value for initialization (we'll re-use val for other copies after this)
    E val = static_cast<E>(const_default_obj());

    // initialize the matrix structure and clear the diagonal so we don't have to
    // keep track of unwritten entries.
    YaleStorage<E>::init(ns, &val);

    E* ns_a    = reinterpret_cast<E*>(ns.a);
    size_t sz  = shape(0) + 1; // current used size of ns
    nm_yale_storage_register(&ns);

    // FIXME: If diagonals line up, it's probably faster to do this with stored diagonal and stored non-diagonal iterators
    for (const_row_iterator it = cribegin(); it != criend(); ++it) {
      for (auto jt = it.begin(); !jt.end(); ++jt) {
        if (it.i() == jt.j()) {
          if (Yield)  ns_a[it.i()] = rb_yield(~jt);
          else        ns_a[it.i()] = static_cast<E>(*jt);
        } else if (*jt != const_default_obj()) {
          if (Yield)  ns_a[sz]     = rb_yield(~jt);
          else        ns_a[sz]     = static_cast<E>(*jt);
          ns.ija[sz]    = jt.j();
          ++sz;
        }
      }
      ns.ija[it.i()+1]  = sz;
    }
    nm_yale_storage_unregister(&ns);

    //ns.ija[shape(0)] = sz;                // indicate end of last row
    ns.ndnz          = sz - shape(0) - 1; // update ndnz count
  }


  /*
   * Allocate a casted copy of this matrix/reference. Remember to NM_FREE() the result!
   *
   * If Yield is true, E must be nm::RubyObject, and it will call an rb_yield upon the stored value.
   */
  template <typename E, bool Yield = false>
  YALE_STORAGE* alloc_copy() const {
    //nm::dtype_t new_dtype = nm::ctype_to_dtype_enum<E>::value_type;

    YALE_STORAGE* lhs;
    if (slice) {
      size_t* xshape    = NM_ALLOC_N(size_t, 2);
      xshape[0]         = shape(0);
      xshape[1]         = shape(1);
      size_t ndnz       = count_copy_ndnz();
      size_t reserve    = shape(0) + ndnz + 1;

//      std::cerr << "reserve = " << reserve << std::endl;

      lhs               = YaleStorage<E>::create(xshape, reserve);

      // FIXME: This should probably be a throw which gets caught outside of the object.
      if (lhs->capacity < reserve)
        rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %lu requested, max allowable is %lu", reserve, lhs->capacity);

      // Fill lhs with what's in our current matrix.
      copy<E, Yield>(*lhs);
    } else {
      // Copy the structure and setup the IJA structure.
      lhs               = alloc_struct_copy<E>(s->capacity);

      E* la = reinterpret_cast<E*>(lhs->a);

      nm_yale_storage_register(lhs);
      for (size_t m = 0; m < size(); ++m) {
        if (Yield) {
    la[m] = rb_yield(nm::yale_storage::nm_rb_dereference(a(m)));
  }
        else       la[m] = static_cast<E>(a(m));
      }
      nm_yale_storage_unregister(lhs);

    }

    return lhs;
  }

  /*
   * Allocate a transposed copy of the matrix
   */
  /*
   * Allocate a casted copy of this matrix/reference. Remember to NM_FREE() the result!
   *
   * If Yield is true, E must be nm::RubyObject, and it will call an rb_yield upon the stored value.
   */
  template <typename E, bool Yield = false>
  YALE_STORAGE* alloc_copy_transposed() const {

    if (slice) {
      rb_raise(rb_eNotImpError, "please make a copy before transposing");
    } else {
      // Copy the structure and setup the IJA structure.
      size_t* xshape    = NM_ALLOC_N(size_t, 2);
      xshape[0]         = shape(1);
      xshape[1]         = shape(0);

      // Take a stab at the number of non-diagonal stored entries we'll have.
      size_t reserve    = size() - xshape[1] + xshape[0];
      YALE_STORAGE* lhs = YaleStorage<E>::create(xshape, reserve);
      E r_init          = static_cast<E>(const_default_obj());
      YaleStorage<E>::init(*lhs, &r_init);

      nm::yale_storage::transpose_yale<D,E,true,true>(shape(0), shape(1), ija_p(), ija_p(), a_p(), const_default_obj(),
                                                      lhs->ija, lhs->ija, reinterpret_cast<E*>(lhs->a), r_init);
      return lhs;
    }

    return NULL;
  }


  /*
   * Comparison between two matrices. Does not check size and such -- assumption is that they are the same shape.
   */
  template <typename E>
  bool operator==(const YaleStorage<E>& rhs) const {
    for (size_t i = 0; i < shape(0); ++i) {
      typename YaleStorage<D>::const_row_iterator li = cribegin(i);
      typename YaleStorage<E>::const_row_iterator ri = rhs.cribegin(i);

      size_t j = 0; // keep track of j so we can compare different defaults

      auto lj = li.begin();
      auto rj = ri.begin();
      while (!lj.end() || !rj.end()) {
        if (lj < rj) {
          if (*lj != rhs.const_default_obj()) return false;
          ++lj;
        } else if (rj < lj) {
          if (const_default_obj() != *rj)     return false;
          ++rj;
        } else { // rj == lj
          if (*lj != *rj) return false;
          ++lj;
          ++rj;
        }
        ++j;
      }

      // if we skip an entry (because it's an ndnz in BOTH matrices), we need to compare defaults.
      // (We know we skipped if lj and rj hit end before j does.)
      if (j < shape(1) && const_default_obj() != rhs.const_default_obj()) return false;

      ++li;
      ++ri;
    }

    return true;
  }

  /*
   * Necessary for element-wise operations. The return dtype will be nm::RUBYOBJ.
   */
  template <typename E>
  VALUE map_merged_stored(VALUE klass, nm::YaleStorage<E>& t, VALUE r_init) const {
    nm_register_value(&r_init);
    VALUE s_init    = const_default_value(),
          t_init    = t.const_default_value();
    nm_register_value(&s_init);
    nm_register_value(&t_init);
    
    // Make a reasonable approximation of the resulting capacity
    size_t s_ndnz   = count_copy_ndnz(),
           t_ndnz   = t.count_copy_ndnz();
    size_t reserve  = shape(0) + std::max(s_ndnz, t_ndnz) + 1;

    size_t* xshape  = NM_ALLOC_N(size_t, 2);
    xshape[0]       = shape(0);
    xshape[1]       = shape(1);

    YALE_STORAGE* rs= YaleStorage<nm::RubyObject>::create(xshape, reserve);

    if (r_init == Qnil) {
      nm_unregister_value(&r_init);
      r_init       = rb_yield_values(2, s_init, t_init);
      nm_register_value(&r_init);
    }

    nm::RubyObject r_init_obj(r_init);

    // Prepare the matrix structure
    YaleStorage<nm::RubyObject>::init(*rs, &r_init_obj);
    NMATRIX* m     = nm_create(nm::YALE_STORE, reinterpret_cast<STORAGE*>(rs));
    nm_register_nmatrix(m);
    VALUE result   = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
    nm_unregister_nmatrix(m);
    nm_register_value(&result);
    nm_unregister_value(&r_init);

    RETURN_SIZED_ENUMERATOR_PRE
    nm_unregister_value(&result);
    nm_unregister_value(&t_init);
    nm_unregister_value(&s_init);
    // No obvious, efficient way to pass a length function as the fourth argument here:
    RETURN_SIZED_ENUMERATOR(result, 0, 0, 0);

    // Create an object for us to iterate over.
    YaleStorage<nm::RubyObject> r(rs);

    // Walk down our new matrix, inserting values as we go.
    for (size_t i = 0; i < xshape[0]; ++i) {
      YaleStorage<nm::RubyObject>::row_iterator   ri = r.ribegin(i);
      typename YaleStorage<D>::const_row_iterator si = cribegin(i);
      typename YaleStorage<E>::const_row_iterator ti = t.cribegin(i);

      auto sj = si.begin();
      auto tj = ti.begin();
      auto rj = ri.ndbegin();

      while (sj != si.end() || tj != ti.end()) {
        VALUE  v;
        size_t j;

        if (sj < tj) {
          v = rb_yield_values(2, ~sj, t_init);
          j = sj.j();
          ++sj;
        } else if (tj < sj) {
          v = rb_yield_values(2, s_init, ~tj);
          j = tj.j();
          ++tj;
        } else {
          v = rb_yield_values(2, ~sj, ~tj);
          j = sj.j();
          ++sj;
          ++tj;
        }

        // FIXME: This can be sped up by inserting all at the same time
        // since it's a new matrix. But that function isn't quite ready
        // yet.
        if (j == i) r.a(i) = v;
        else        rj     = ri.insert(rj, j, v);
        //RB_P(rb_funcall(result, rb_intern("yale_ija"), 0));
      }
    }
    nm_unregister_value(&result);
    nm_unregister_value(&t_init);
    nm_unregister_value(&s_init);

    return result;
  }

protected:
  /*
   * Update row sizes starting with row i
   */
  void update_real_row_sizes_from(size_t real_i, int change) {
    ++real_i;
    for (; real_i <= real_shape(0); ++real_i) {
      ija(real_i) += change;
    }
  }


  /*
   * Like move_right, but also involving a resize. This updates row sizes as well. This version also takes a plan for
   * multiple rows, and tries to do them all in one copy. It's used for multi-row slice-setting.
   *
   * This also differs from update_resize_move in that it resizes to the exact requested size instead of reserving space.
   */
  void update_resize_move_insert(size_t real_i, size_t real_j, size_t* lengths, D* const v, size_t v_size, multi_row_insertion_plan p) {
    size_t sz      = size(); // current size of the storage vectors
    size_t new_cap = sz + p.total_change;

    if (new_cap > real_max_size()) {
      NM_FREE(v);
      rb_raise(rb_eStandardError, "resize caused by insertion of size %d (on top of current size %lu) would have caused yale matrix size to exceed its maximum (%lu)", p.total_change, sz, real_max_size());
    }

    if (s->dtype == nm::RUBYOBJ) {
      nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
    }

    size_t* new_ija     = NM_ALLOC_N( size_t,new_cap );
    D* new_a            = NM_ALLOC_N( D,     new_cap );

    // Copy unchanged row pointers first.
    size_t m = 0;
    for (; m <= real_i; ++m) {
      new_ija[m]        = ija(m);
      new_a[m]          = a(m);
    }

    // Now copy unchanged locations in IJA and A.
    size_t q = real_shape(0)+1; // q is the copy-to position.
    size_t r = real_shape(0)+1; // r is the copy-from position.
    for (; r < p.pos[0]; ++r, ++q) {
      new_ija[q]        = ija(r);
      new_a[q]          = a(r);
    }

    // For each pos and change in the slice, copy the information prior to the insertion point. Then insert the necessary
    // information.
    size_t v_offset = 0;
    int accum = 0; // keep track of the total change as we go so we can update row information.
    for (size_t i = 0; i < lengths[0]; ++i, ++m) {
      for (; r < p.pos[i]; ++r, ++q) {
        new_ija[q]      = ija(r);
        new_a[q]        = a(r);
      }

      // Insert slice data for a single row.
      for (size_t j = 0; j < lengths[1]; ++j, ++v_offset) {
        if (v_offset >= v_size) v_offset %= v_size;

        if (j + real_j == i + real_i) { // modify diagonal
          new_a[real_i + i] = v[v_offset];
        } else if (v[v_offset] != const_default_obj()) {
          new_ija[q]        = j + real_j;
          new_a[q]          = v[v_offset];
          ++q; // move on to next q location
        }

        if (r < ija(real_shape(0)) && ija(r) == j + real_j) ++r; // move r forward if the column matches.
      }

      // Update the row pointer for the current row.
      accum                += p.change[i];
      new_ija[m]            = ija(m) + accum;
      new_a[m]              = a(m); // copy diagonal for this row
    }

    // Now copy everything subsequent to the last insertion point.
    for (; r < size(); ++r, ++q) {
      new_ija[q]            = ija(r);
      new_a[q]              = a(r);
    }

    // Update the remaining row pointers and copy remaining diagonals
    for (; m <= real_shape(0); ++m) {
      new_ija[m]            = ija(m) + accum;
      new_a[m]              = a(m);
    }

    s->capacity = new_cap;

    NM_FREE(s->ija);
    NM_FREE(s->a);

    if (s->dtype == nm::RUBYOBJ) {
      nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
    }   

    s->ija      = new_ija;
    s->a        = reinterpret_cast<void*>(new_a);
  }


  /*
   * Like move_right, but also involving a resize. This updates row sizes as well.
   */
  void update_resize_move(row_stored_nd_iterator position, size_t real_i, int n) {
    size_t sz      = size(); // current size of the storage vectors
    size_t new_cap = n > 0 ? capacity() * nm::yale_storage::GROWTH_CONSTANT
                           : capacity() / nm::yale_storage::GROWTH_CONSTANT;
    size_t max_cap = real_max_size();

    if (new_cap > max_cap) {
      new_cap = max_cap;
      if (sz + n > max_cap)
        rb_raise(rb_eStandardError, "resize caused by insertion/deletion of size %d (on top of current size %lu) would have caused yale matrix size to exceed its maximum (%lu)", n, sz, real_max_size());
    }

    if (new_cap < sz + n) new_cap = sz + n;

    size_t* new_ija     = NM_ALLOC_N( size_t,new_cap );
    D* new_a            = NM_ALLOC_N( D,     new_cap );

    // Copy unchanged row pointers first.
    for (size_t m = 0; m <= real_i; ++m) {
      new_ija[m]        = ija(m);
      new_a[m]          = a(m);
    }

    // Now update row pointers following the changed row as we copy the additional values.
    for (size_t m = real_i + 1; m <= real_shape(0); ++m) {
      new_ija[m]        = ija(m) + n;
      new_a[m]          = a(m);
    }

    // Copy all remaining prior to insertion/removal site
    for (size_t m = real_shape(0) + 1; m < position.p(); ++m) {
      new_ija[m]        = ija(m);
      new_a[m]          = a(m);
    }

    // Copy all subsequent to insertion/removal site
    size_t m = position.p();
    if (n < 0) m -= n;

    for (; m < sz; ++m) {
      new_ija[m+n]      = ija(m);
      new_a[m+n]        = a(m);
    }

    if (s->dtype == nm::RUBYOBJ) {
      nm_yale_storage_register_a(new_a, new_cap);
    }

    s->capacity = new_cap;

    NM_FREE(s->ija);
    NM_FREE(s->a);

    if (s->dtype == nm::RUBYOBJ) {
      nm_yale_storage_unregister_a(new_a, new_cap);
    }

    s->ija      = new_ija;
    s->a        = reinterpret_cast<void*>(new_a);
  }


  /*
   * Move elements in the IJA and A arrays by n (to the right).
   * Does not update row sizes.
   */
  void move_right(row_stored_nd_iterator position, size_t n) {
    size_t sz = size();
    for (size_t m = 0; m < sz - position.p(); ++m) {
      ija(sz+n-1-m) = ija(sz-1-m);
      a(sz+n-1-m)   = a(sz-1-m);
    }
  }

  /*
   * Move elements in the IJA and A arrays by n (to the left). Here position gives
   * the location to move to, and they should come from n to the right.
   */
  void move_left(row_stored_nd_iterator position, size_t n) {
    size_t sz = size();
    for (size_t m = position.p() + n; m < sz; ++m) {   // work backwards
      ija(m-n)      = ija(m);
      a(m-n)        = a(m);
    }
  }

  YALE_STORAGE* s;
  bool          slice;
  size_t*       slice_shape;
  size_t*       slice_offset;
};

} // end of nm namespace

#endif // YALE_CLASS_H


================================================
FILE: ext/nmatrix/storage/yale/iterators/base.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == base.h
//
// Yale storage pure virtual basic_iterator class.
//

#ifndef YALE_ITERATORS_BASE_H
# define YALE_ITERATORS_BASE_H

#include <ruby.h>
#include <type_traits>
#include <typeinfo>
#include <stdexcept>

namespace nm {

template <typename D> class YaleStorage;

namespace yale_storage {

template <typename D>
VALUE nm_rb_dereference(D const& v) {
  return nm::RubyObject(v).rval;
}

template <>
VALUE nm_rb_dereference<nm::RubyObject>(nm::RubyObject const& v) {
  return v.rval;
}

/*
 * Iterator base class (pure virtual).
 */
template <typename D,
          typename RefType,
          typename YaleRef = typename std::conditional<
            std::is_const<RefType>::value,
            const nm::YaleStorage<D>,
            nm::YaleStorage<D>
          >::type>
class basic_iterator_T {

protected:
  YaleRef& y;
  size_t i_;
  size_t p_;

public:
  size_t offset(size_t d) const { return y.offset(d); }
  size_t shape(size_t d) const { return y.shape(d); }
  size_t real_shape(size_t d) const { return y.real_shape(d); }

  size_t dense_location() const {
    return i()*shape(1) + j();
  }

  template <typename T = typename std::conditional<std::is_const<RefType>::value, const size_t, size_t>::type>
  T& ija(size_t pp) const { return y.ija(pp); }

  template <typename T = typename std::conditional<std::is_const<RefType>::value, const size_t, size_t>::type>
  T& ija(size_t pp) { return y.ija(pp); }

  virtual bool diag() const {
    return p_ < std::min(y.real_shape(0), y.real_shape(1));
  }
  virtual bool done_with_diag() const {
    return p_ == std::min(y.real_shape(0), y.real_shape(1));
  }
  virtual bool nondiag() const {
    return p_ > std::min(y.real_shape(0), y.real_shape(1));
  }

  basic_iterator_T(YaleRef& obj, size_t ii = 0, size_t pp = 0) : y(obj), i_(ii), p_(pp) { }

  basic_iterator_T<D,RefType,YaleRef>& operator=(const basic_iterator_T<D,RefType,YaleRef>& rhs) {
    if (&y != &(rhs.y)) throw std::logic_error("can only be used on iterators with the same matrix");
    i_ = rhs.i_;
    p_ = rhs.p_;
    return *this;
  }

  virtual inline size_t i() const { return i_; }
  virtual size_t j() const = 0;

  virtual inline VALUE rb_i() const { return LONG2NUM(i()); }
  virtual inline VALUE rb_j() const { return LONG2NUM(j()); }

  virtual size_t real_i() const { return offset(0) + i(); }
  virtual size_t real_j() const { return offset(1) + j(); }
  virtual size_t p() const { return p_; }
  virtual bool real_ndnz_exists() const { return !y.real_row_empty(real_i()) && ija(p_) == real_j(); }

  virtual RefType& operator*() = 0;
  virtual RefType& operator*() const = 0;


  // Ruby VALUE de-reference
  inline VALUE operator~() const {
    return nm_rb_dereference<D>(**this);
  //virtual VALUE operator~() const {
  //  if (typeid(D) == typeid(RubyObject)) return (**this); // FIXME: return rval instead, faster;
  //  else return RubyObject(*(*this)).rval;
  }

  virtual bool operator==(const std::pair<size_t,size_t>& ij) {
    if (p() >= ija(real_shape(0))) return false;
    else return i() == ij.first && j() == ij.second;
  }

  virtual bool operator==(const basic_iterator_T<D,RefType,YaleRef>& rhs) const {
    return i() == rhs.i() && j() == rhs.j();
  }
  virtual bool operator!=(const basic_iterator_T<D,RefType,YaleRef>& rhs) const {
    return i() != rhs.i() || j() != rhs.j();
  }
};


} } // end of namespace nm::yale_storage

#endif // YALE_ITERATORS_BASE_H


================================================
FILE: ext/nmatrix/storage/yale/iterators/iterator.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == iterator.h
//
// Iterate over yale as if dense
//

#ifndef YALE_ITERATORS_ITERATOR_H
# define YALE_ITERATORS_ITERATOR_H

#include <ruby.h>
#include <type_traits>
#include <typeinfo>

namespace nm { namespace yale_storage {

/*
 * Iterator for traversing matrix class as if it were dense (visits each entry in order).
 */
template <typename D,
          typename RefType,
          typename YaleRef = typename std::conditional<
            std::is_const<RefType>::value,
            const nm::YaleStorage<D>,
            nm::YaleStorage<D>
          >::type>
class iterator_T : public basic_iterator_T<D,RefType,YaleRef> {
  using basic_iterator_T<D,RefType,YaleRef>::i_;
  using basic_iterator_T<D,RefType,YaleRef>::p_;
  using basic_iterator_T<D,RefType,YaleRef>::y;
  using basic_iterator_T<D,RefType,YaleRef>::offset;
  using basic_iterator_T<D,RefType,YaleRef>::shape;
  using basic_iterator_T<D,RefType,YaleRef>::ija;

protected:
  size_t j_; // These are relative to the slice.

public:
  // Create an iterator. May select the row since this is O(1).
  iterator_T(YaleRef& obj, size_t ii = 0)
  : basic_iterator_T<D,RefType,YaleRef>(obj, ii, obj.ija(ii + obj.offset(0))), j_(0)
  {
    // advance to the beginning of the row
    if (obj.offset(1) > 0)
      p_ = y.find_pos_for_insertion(i_,j_);
  }

  // Prefix ++
  iterator_T<D,RefType,YaleRef>& operator++() {
    size_t prev_j = j_++;
    if (j_ >= shape(1)) {
      j_ = 0;
      ++i_;

      // Do a binary search to find the beginning of the slice
      p_ = offset(0) > 0 ? y.find_pos_for_insertion(i_,j_) : ija(i_);
    } else {
      // If the last j was actually stored in this row of the matrix, need to advance p.

      if (!y.real_row_empty(i_ + offset(0)) && ija(p_) <= prev_j + offset(1)) ++p_;  // this test is the same as real_ndnz_exists
    }

    return *this;
  }

  iterator_T<D,RefType,YaleRef> operator++(int dummy) const {
    iterator_T<D,RefType,YaleRef> iter(*this);
    return ++iter;
  }

  virtual bool operator!=(const iterator_T<D,RefType,YaleRef>& rhs) const {
    return this->dense_location() != rhs.dense_location();
  }

  virtual bool operator==(const iterator_T<D,RefType,YaleRef>& rhs) const {
    return this->dense_location() == rhs.dense_location();
  }

  bool operator<(const iterator_T<D,RefType,YaleRef>& rhs) const {
    return this->dense_location() < rhs.dense_location();
  }

  bool operator>(const iterator_T<D,RefType,YaleRef>& rhs) const {
    return this->dense_location() > rhs.dense_location();
  }

  virtual bool diag() const { return i_ + offset(0) == j_ + offset(1); }

  // De-reference
  RefType& operator*() {
    if (diag())                                                                return y.a( i_ + offset(0) );
    else if (p_ >= ija(i_+offset(0)+1))                                        return y.const_default_obj();
    else if (!y.real_row_empty(i_ + offset(0)) && ija(p_) == j_ + offset(1))   return y.a( p_ );
    else                                                                       return y.const_default_obj();
  }

  RefType& operator*() const {
    if (diag())                                                                return y.a( i_ + offset(0) );
    else if (p_ >= ija(i_+offset(0)+1))                                        return y.const_default_obj();
    else if (!y.real_row_empty(i_ + offset(0)) && ija(p_) == j_ + offset(1))   return y.a( p_ );
    else                                                                       return y.const_default_obj();
  }

  virtual size_t j() const { return j_; }
};


} } // end of namespace nm::yale_storage

#endif // YALE_ITERATORS_ITERATOR_H


================================================
FILE: ext/nmatrix/storage/yale/iterators/row.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == row.h
//
// Iterator for traversing a matrix row by row. Includes an
// orthogonal iterator for visiting each stored entry in a row.
// This one cannot be de-referenced; you have to de-reference
// the column.

#ifndef YALE_ITERATORS_ROW_H
# define YALE_ITERATORS_ROW_H

#include <ruby.h>
#include <stdexcept>

namespace nm { namespace yale_storage {

template <typename D,
          typename RefType,
          typename YaleRef = typename std::conditional<
            std::is_const<RefType>::value,
            const nm::YaleStorage<D>,
            nm::YaleStorage<D>
          >::type>
class row_iterator_T {

protected:
  YaleRef& y;
  size_t i_;
  size_t p_first, p_last; // first and last IJA positions in the row


  /*
   * Update the row positions -- use to ensure a row stays valid after an insert operation. Also
   * used to initialize a row iterator at a different row index.
   */
  void update() {
    if (i_ < y.shape(0)) {
      p_first = p_real_first();
      p_last  = p_real_last();
      if (!nd_empty()) {
        // try to find new p_first
        p_first = y.real_find_left_boundary_pos(p_first, p_last, y.offset(1));
        if (!nd_empty()) {
          // also try to find new p_last
          p_last = y.real_find_left_boundary_pos(p_first, p_last, y.offset(1) + y.shape(1) - 1);
          if (y.ija(p_last) - y.offset(1) >= shape(1)) --p_last; // searched too far.
        }
      }
    } else { // invalid row -- this is an end iterator.
      p_first = y.ija(y.real_shape(0));
      p_last  = y.ija(y.real_shape(0))-1; // mark as empty
    }
  }

  /*
   * Indicate to the row iterator that p_first and p_last have moved by some amount. Only
   * defined for row_iterator, not const_row_iterator. This is a lightweight form of update().
   */
  //template <typename = typename std::enable_if<!std::is_const<RefType>::value>::type>
  void shift(int amount) {
    p_first += amount;
    p_last  += amount;
  }


  /*
   * Enlarge the row by amount by moving p_last over. This is a lightweight form of update().
   */
  //template <typename = typename std::enable_if<!std::is_const<RefType>::value>::type>
  void adjust_length(int amount) {
    p_last  += amount;
  }

public:
/*  typedef row_stored_iterator_T<D,RefType,YaleRef>                  row_stored_iterator;
  typedef row_stored_nd_iterator_T<D,RefType,YaleRef>               row_stored_nd_iterator;
  typedef row_stored_iterator_T<D,const RefType,const YaleRef>      const_row_stored_iterator;
  typedef row_stored_nd_iterator_T<D,const RefType,const YaleRef>   const_row_stored_nd_iterator;*/
  typedef row_stored_iterator_T<D,RefType,YaleRef, row_iterator_T<D,RefType,YaleRef> > row_stored_iterator;
  typedef row_stored_nd_iterator_T<D,RefType,YaleRef, row_iterator_T<D,RefType,YaleRef> > row_stored_nd_iterator;
  template <typename E, typename ERefType, typename EYaleRef> friend class row_iterator_T;
  friend class row_stored_iterator_T<D,RefType,YaleRef, row_iterator_T<D,RefType,YaleRef> >;
  friend class row_stored_nd_iterator_T<D,RefType,YaleRef, row_iterator_T<D,RefType,YaleRef> >;//row_stored_iterator;
  friend class row_stored_iterator_T<D,RefType,YaleRef, const row_iterator_T<D,RefType,YaleRef> >;
  friend class row_stored_nd_iterator_T<D,RefType,YaleRef, const row_iterator_T<D,RefType,YaleRef> >;//row_stored_iterator;
  friend class nm::YaleStorage<D>;

  //friend row_stored_nd_iterator;

  inline size_t ija(size_t pp) const { return y.ija(pp); }
  inline size_t& ija(size_t pp)      { return y.ija(pp); }
  inline RefType& a(size_t p) const  { return y.a_p()[p]; }
  inline RefType& a(size_t p)        { return y.a_p()[p]; }


  row_iterator_T(YaleRef& obj, size_t ii = 0)
  : y(obj), i_(ii)
  {
    update();
  }


  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator!=(const row_iterator_T<E,ERefType>& rhs) const {
    return i_ != rhs.i_;
  }

  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator==(const row_iterator_T<E,ERefType>& rhs) const {
    return i_ == rhs.i_;
  }

  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator<(const row_iterator_T<E,ERefType>& rhs) const {
    return i_ < rhs.i_;
  }

  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator>(const row_iterator_T<E,ERefType>& rhs) const {
    return i_ > rhs.i_;
  }

  row_iterator_T<D,RefType,YaleRef>& operator++() {
    if (is_end()) throw std::out_of_range("attempted to iterate past end of slice (vertically)");
    ++i_;
    update();
    return *this;
  }

  row_iterator_T<D,RefType,YaleRef> operator++(int dummy) const {
    row_iterator_T<D,RefType,YaleRef> next(*this);
    return ++next;
  }

  bool is_end() const {
    return i_ == y.shape(0) && p_first == y.ija(y.real_shape(0));
  }

  size_t real_i() const {
    return i_ + y.offset(0);
  }

  size_t i() const {
    return i_;
  }

  // last element of the real row
  size_t p_real_last() const {
    return y.ija(real_i()+1)-1;
  }

  // first element of the real row
  size_t p_real_first() const {
    return y.ija(real_i());
  }

  // Is the real row of the original matrix totally empty of NDs?
  bool real_nd_empty() const {
    return p_real_last() < p_real_first();
  }

  bool nd_empty() const {
    return p_last < p_first;
  }

  // slice j coord of the diag.
  size_t diag_j() const {
    if (!has_diag())
      throw std::out_of_range("don't call diag_j unless you've checked for one");
    return real_i() - y.offset(1);
  }

  // return the actual position of the diagonal element for this real row, regardless of whether
  // it's in range or not.
  size_t p_diag() const {
    return real_i();
  }

  // Checks to see if there is a diagonal within the slice
  bool has_diag() const {
    // real position of diag is real_i == real_j. Is it in range?
    return (p_diag() >= y.offset(1) && p_diag() - y.offset(1) < y.shape(1));
  }

  // Checks to see if the diagonal is the first entry in the slice.
  bool is_diag_first() const {
    if (!has_diag()) return false;
    if (nd_empty())  return true;
    return diag_j() < y.ija(p_first) - y.offset(1);
  }

  // Checks to see if the diagonal is the last entry in the slice.
  bool is_diag_last() const {
    if (!has_diag()) return false;
    if (nd_empty())  return true;
    return diag_j() > y.ija(p_last);
  }

  // Is the row of the slice totally empty of NDs and Ds?
  // We can only determine that it's empty of Ds if the diagonal
  // is not a part of the sliced portion of the row.
  bool empty() const {
    return nd_empty() && has_diag();
  }


  size_t shape(size_t pp) const {
    return y.shape(pp);
  }

  size_t offset(size_t pp) const {
    return y.offset(pp);
  }

  inline VALUE rb_i() const { return LONG2NUM(i()); }

  row_stored_iterator_T<D,RefType,YaleRef> begin() {  return row_stored_iterator_T<D,RefType,YaleRef>(*this, p_first);  }
  row_stored_nd_iterator_T<D,RefType,YaleRef> ndbegin() {  return row_stored_nd_iterator_T<D,RefType,YaleRef>(*this, p_first);  }
  row_stored_iterator_T<D,RefType,YaleRef> end() { return row_stored_iterator_T<D,RefType,YaleRef>(*this, p_last+1, true); }
  row_stored_nd_iterator_T<D,RefType,YaleRef> ndend() {  return row_stored_nd_iterator_T<D,RefType,YaleRef>(*this, p_last+1); }

  row_stored_iterator_T<D,RefType,YaleRef> begin() const {  return row_stored_iterator_T<D,RefType,YaleRef>(*this, p_first);  }
  row_stored_nd_iterator_T<D,RefType,YaleRef> ndbegin() const {  return row_stored_nd_iterator_T<D,RefType,YaleRef>(*this, p_first);  }
  row_stored_iterator_T<D,RefType,YaleRef> end() const { return row_stored_iterator_T<D,RefType,YaleRef>(*this, p_last+1, true); }
  row_stored_nd_iterator_T<D,RefType,YaleRef> ndend() const {  return row_stored_nd_iterator_T<D,RefType,YaleRef>(*this, p_last+1); }


  row_stored_nd_iterator_T<D,RefType,YaleRef> lower_bound(const size_t& j) const {
    row_stored_nd_iterator_T<D,RefType,YaleRef>(*this, y.real_find_left_boundary_pos(p_first, p_last, y.offset(1)));
  }

  row_stored_nd_iterator_T<D,RefType,YaleRef> ndfind(size_t j) {
    if (j == 0) return ndbegin();
    size_t p = p_first > p_last ? p_first : y.real_find_left_boundary_pos(p_first, p_last, j + y.offset(1));
    row_stored_nd_iterator iter = row_stored_nd_iterator_T<D,RefType,YaleRef>(*this, p);
    return iter;
  }

  row_stored_iterator_T<D,RefType,YaleRef> find(size_t j) {
    if (j == 0) return begin(); // may or may not be on the diagonal
    else return row_stored_iterator_T<D,RefType,YaleRef>(*this, ndfind(j).p(), false); // is on the diagonal, definitely
  }

  /*
   * Remove an entry from an already found non-diagonal position. Adjust this row appropriately so we can continue to
   * use it.
   */
  //template <typename = typename std::enable_if<!std::is_const<RefType>::value>::type>
  row_stored_nd_iterator erase(row_stored_nd_iterator position) {
    size_t sz = y.size();
    if (sz - 1 <= y.capacity() / nm::yale_storage::GROWTH_CONSTANT) {
      y.update_resize_move(position, real_i(), -1);
    } else {
      y.move_left(position, 1);
      y.update_real_row_sizes_from(real_i(), -1);
    }
    adjust_length(-1);
    return row_stored_nd_iterator(*this, position.p()-1);
  }

  /*
   * Remove an entry from the matrix at the already-located position. If diagonal, just sets to default; otherwise,
   * actually removes the entry.
   */
  //template <typename = typename std::enable_if<!std::is_const<RefType>::value>::type>
  row_stored_nd_iterator erase(const row_stored_iterator& jt) {
    if (jt.diag()) {
      *jt = y.const_default_obj(); // diagonal is the easy case -- no movement.
      return row_stored_nd_iterator(*this, jt.p());
    } else {
      return erase(row_stored_nd_iterator(*this, jt.p()));
    }
  }


  //template <typename = typename std::enable_if<!std::is_const<RefType>::value>::type>
  row_stored_nd_iterator insert(row_stored_nd_iterator position, size_t jj, const D& val) {
    size_t sz = y.size();
    while (!position.end() && position.j() < jj) ++position; // position is just a hint. (This loop ideally only has to happen once.)

    if (!position.end() && position.j() == jj) {
      *position = val;      // replace existing
    } else {

      if (sz + 1 > y.capacity()) {
        y.update_resize_move(position, real_i(), 1);
      } else {
        y.move_right(position, 1);
        y.update_real_row_sizes_from(real_i(), 1);
      }
      ija(position.p()) = jj + y.offset(1);    // set column ID
      a(position.p())   = val;
      adjust_length(1);
    }

    return position++;
  }


  /*
   * This version of insert doesn't return anything. Why, when the others do?
   *
   * Well, mainly because j here can be a diagonal entry. Most of the inserters return the *next* element following
   * the insertion, but to do that, we have to create a row_stored_nd_iterator, which requires at least one binary
   * search for the location following the diagonal (and as of the writing of this, two binary searches). There's no
   * reason to do that when we never actually *use* the return value. So instead we just have void.
   */
  //template <typename = typename std::enable_if<!std::is_const<RefType>::value>::type>
  void insert(size_t j, const D& val) {
    if (j + y.offset(1) == real_i())  a(real_i()) = val;
    else {
      row_stored_nd_iterator jt = ndfind(j);
      if (!jt.end() && jt.j() == j) {
        if (val == y.const_default_obj()) erase(jt);          // erase
        else                              insert(jt, j, val); // replace
      } else { // only insert if it's not the default
        if (val != y.const_default_obj()) insert(jt, j, val);
      }
    }
  }


  /*
   * Determines a plan for inserting a single row. Returns an integer giving the amount of the row change.
   */
  int single_row_insertion_plan(row_stored_nd_iterator position, size_t jj, size_t length, D const* v, size_t v_size, size_t& v_offset) {
    int nd_change = 0;

    for (size_t jc = jj; jc < jj + length; ++jc, ++v_offset) {
      if (v_offset >= v_size) v_offset %= v_size; // reset v position.

      if (jc + y.offset(1) != real_i()) { // diagonal    -- no nd_change here
        if (position.end()) {
          if (v[v_offset] != y.const_default_obj()) nd_change++; // insert
        } else if (position.j() != jc) { // not present -- do we need to add it?
          if (v[v_offset] != y.const_default_obj()) nd_change++;
        } else {  // position.j() == jc
          if (v[v_offset] == y.const_default_obj()) nd_change--;
          ++position; // move iterator forward.
        }
      }
    }
    return nd_change;
  }

  /*
   * Determine a plan for inserting a single row -- finds the position first. Returns the position and
   * the change amount. Don't use this one if you can help it because it requires a binary search of
   * the row.
   */
  std::pair<int,size_t> single_row_insertion_plan(size_t jj, size_t length, D const* v, size_t v_size, size_t& v_offset) {
    std::pair<int,size_t> result;
    row_stored_nd_iterator pos = ndfind(jj);
    result.first = single_row_insertion_plan(pos, jj, length, v, v_size, v_offset);
    result.second = pos.p();
    return result;
  }

  /*
   * Insert elements into a single row. Returns an iterator to the end of the insertion range.
   */
  row_stored_nd_iterator insert(row_stored_nd_iterator position, size_t jj, size_t length, D const* v, size_t v_size, size_t& v_offset) {
    size_t tmp_v_offset = v_offset;
    int nd_change = single_row_insertion_plan(position, jj, length, v, v_size, tmp_v_offset);

    // First record the position, just in case our iterator becomes invalid.
    size_t pp = position.p();

    // Resize the array as necessary, or move entries after the insertion point to make room.
    size_t sz = y.size();
    if (sz + nd_change > y.capacity() || sz + nd_change <= y.capacity() / nm::yale_storage::GROWTH_CONSTANT)
      y.update_resize_move(position, real_i(), nd_change);
    else if (nd_change != 0) {
      if (nd_change < 0)       y.move_left(position, -nd_change);
      else if (nd_change > 0)  y.move_right(position, nd_change);
      y.update_real_row_sizes_from(real_i(), nd_change);
    }

    for (size_t jc = jj; jc < jj + length; ++jc, ++v_offset) {
      if (v_offset >= v_size) v_offset %= v_size; // reset v position.

      if (jc + y.offset(1) == real_i()) {
        y.a(real_i())   = v[v_offset];  // modify diagonal
      } else if (v[v_offset] != y.const_default_obj()) {
        y.ija(pp)       = jc;           // modify non-diagonal
        y.a(pp)         = v[v_offset];
        ++pp;
      }
    }

    // Update this row.
    adjust_length(nd_change);

    return row_stored_nd_iterator(*this, pp);
  }

  /*
   * For when we don't need to worry about the offset, does the same thing as the insert above.
   */
  row_stored_nd_iterator insert(const row_stored_nd_iterator& position, size_t jj, size_t length, D const* v, size_t v_size) {
    size_t v_offset = 0;
    return insert(position, jj, length, v, v_size, v_offset);
  }


  /*
   * Merges elements offered for insertion with existing elements in the row.
   */
  row_stored_nd_iterator insert(size_t jj, size_t length, D const* v, size_t v_size, size_t& v_offset) {
    return insert(ndfind(jj), jj, length, v, v_size, v_offset);
  }

  /*
   * Merges elements offered for insertion with existing elements in the row.
   */
  row_stored_nd_iterator insert(size_t jj, size_t length, D const* v, size_t v_size) {
    size_t v_offset = 0;
    return insert(ndfind(jj), jj, length, v, v_size, v_offset);
  }


};

} } // end of nm::yale_storage namespace

#endif // YALE_ITERATORS_ROW_H


================================================
FILE: ext/nmatrix/storage/yale/iterators/row_stored.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == row_stored.h
//
// Iterator for traversing a single stored row of a matrix (needed
// for row.h). FIXME: This is not as efficient as it could be; it uses
// two binary searches to find the beginning and end of each slice.
// The end search shouldn't be necessary, but I couldn't make it
// work without it, and eventually decided my dissertation should
// be a priority.
//

#ifndef YALE_ITERATORS_ROW_STORED_H
# define YALE_ITERATORS_ROW_STORED_H

#include <ruby.h>
#include <stdexcept>

namespace nm { namespace yale_storage {


/*
 * Iterator for visiting each stored element in a row, including diagonals.
 */
template <typename D,
          typename RefType,
          typename YaleRef = typename std::conditional<
            std::is_const<RefType>::value,
            const nm::YaleStorage<D>,
            nm::YaleStorage<D>
          >::type,
          typename RowRef = typename std::conditional<
            std::is_const<RefType>::value,
            const row_iterator_T<D,RefType,YaleRef>,
            row_iterator_T<D,RefType,YaleRef>
          >::type>
class row_stored_iterator_T : public row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef> {
protected:
  using row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef>::r;
  using row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef>::p_;
  bool d_visited, d;

public:

  // end_ is necessary for the logic when a row is empty other than the diagonal. If we just
  // relied on pp == last_p+1, it'd look like these empty rows were actually end() iterators.
  // So we have to actually mark end_ by telling it to ignore that diagonal visitation.
  row_stored_iterator_T(RowRef& row, size_t pp, bool end_ = false)
  : row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef>(row, pp),
    d_visited(!row.has_diag()), // if the row has no diagonal, just marked it as visited.
    d(r.is_diag_first() && !end_)        // do we start at the diagonal?
  {
  }

  /* Diagonal constructor. Puts us on the diagonal (unless end is true) */
  /*row_stored_iterator_T(RowRef& row, bool end_, size_t j)
  : row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef>(row.ndfind(j)),
    d_visited(false),
    d(!end_ && j + row.offset(1) == row.real_i())
  { }*/

  virtual bool diag() const {
    return d;
  }

  virtual bool end() const {
    return !d && p_ > r.p_last;
  }

  row_stored_iterator_T<D,RefType,YaleRef,RowRef>& operator++() {
    if (end()) throw std::out_of_range("cannot increment row stored iterator past end of stored row");
    if (d) {
      d_visited = true;
      d         = false;
    } else {
      ++p_;
      // Are we at a diagonal?
      // If we hit the end or reach a point where j > diag_j, and still
      // haven't visited the diagonal, we should do so before continuing.
      if (!d_visited && (end() || j() > r.diag_j())) {
        d = true;
      }
    }

    return *this;
  }

  row_stored_iterator_T<D,RefType,YaleRef,RowRef> operator++(int dummy) const {
    row_stored_iterator_T<D,RefType,YaleRef,RowRef> r(*this);
    return ++r;
  }

  size_t j() const {
    if (end()) throw std::out_of_range("cannot dereference an end pointer");
    return (d ? r.p_diag() : r.ija(p_)) - r.offset(1);
  }

  // Need to declare all row_stored_iterator_T friends of each other.
  template <typename E, typename ERefType, typename EYaleRef, typename ERowRef> friend class row_stored_iterator_T;

  // De-reference the iterator
  RefType& operator*()       {
    return d ? r.a(r.p_diag()) : r.a(p_);
  }

  RefType& operator*() const {
    return d ? r.a(r.p_diag()) : r.a(p_);
  }

  // Ruby VALUE de-reference
  VALUE operator~() const {
    return nm_rb_dereference<D>(**this);
  }

};

}} // end of namespace nm::yale_storage

#endif // YALE_ITERATORS_ROW_STORED_H


================================================
FILE: ext/nmatrix/storage/yale/iterators/row_stored_nd.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == row_stored_nd.h
//
// Yale storage row-by-row nondiagonal-storage iterator
//

#ifndef YALE_ITERATORS_ROW_STORED_ND_H
# define YALE_ITERATORS_ROW_STORED_ND_H

#include <ruby.h>
#include <type_traits>
#include <typeinfo>
#include <stdexcept>

namespace nm { namespace yale_storage {

/*
 * Constants
 */
const float GROWTH_CONSTANT = 1.5;


/*
 * Forward declarations
 */
template <typename D, typename RefType, typename YaleRef> class row_iterator_T;

/*
 * Iterator for visiting each stored element in a row, including diagonals.
 */
template <typename D,
          typename RefType,
          typename YaleRef = typename std::conditional<
            std::is_const<RefType>::value,
            const nm::YaleStorage<D>,
            nm::YaleStorage<D>
          >::type,
          typename RowRef = typename std::conditional<
            std::is_const<RefType>::value,
            const row_iterator_T<D,RefType,YaleRef>,
            row_iterator_T<D,RefType,YaleRef>
          >::type>
class row_stored_nd_iterator_T {
protected:
  RowRef& r;
  size_t p_;

public:

  row_stored_nd_iterator_T(RowRef& row, size_t pp)
  : r(row),
    p_(pp)        // do we start at the diagonal?
  {
  }

  // DO NOT IMPLEMENT THESE FUNCTIONS. They prevent C++ virtual slicing
  //template <typename T> row_stored_nd_iterator_T(T const& rhs);
  //template <typename T> row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef> const& operator=(T const& rhs);

  // Next two functions are to ensure we can still cast between nd iterators.
  row_stored_nd_iterator_T(row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef> const& rhs)
  : r(rhs.r), p_(rhs.p_)
  { }

  row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef> const& operator=(row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef> const& rhs) {
    if (&r != &(rhs.r))
      throw std::logic_error("can't assign iterator from another row iterator");
    p_ = rhs.p_;
    return *this;
  }

  virtual size_t p() const { return p_; }

  virtual bool end() const {
    return p_ > r.p_last;
  }

  row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef>& operator++() {
    if (end()) throw std::out_of_range("cannot increment row stored iterator past end of stored row");
    ++p_;

    return *this;
  }

  row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef> operator++(int dummy) const {
    row_stored_nd_iterator_T<D,RefType,YaleRef,RowRef> r(*this);
    return ++r;
  }

  virtual size_t j() const {
    if (end()) throw std::out_of_range("cannot dereference (get j()) for an end pointer");
    return r.ija(p_) - r.offset(1);
  }

  // Need to declare all row_stored_nd_iterator_T friends of each other.
  template <typename E, typename ERefType, typename EYaleRef, typename ERowRef> friend class row_stored_nd_iterator_T;


  virtual bool operator==(const row_stored_nd_iterator_T<D,RefType>& rhs) const {
    if (r.i() != rhs.r.i())     return false;
    if (end())                  return rhs.end();
    else if (rhs.end())         return false;
    return j() == rhs.j();
  }

  // There is something wrong with this function.
  virtual bool operator!=(const row_stored_nd_iterator_T<D,RefType>& rhs) const {
    if (r.i() != rhs.r.i()) return true;
    if (end())              return !rhs.end();
    else if (rhs.end())     return true;
    return j() != rhs.j();
  }

  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator<(const row_stored_nd_iterator_T<E,ERefType>& rhs) const {
    if (r < rhs.r)      return true;
    if (r > rhs.r)      return false;

    // r == rhs.r
    if (end())        return false;
    if (rhs.end())    return true;
    return j() < rhs.j();
  }

  // De-reference the iterator
  RefType& operator*()       {
    return r.a(p_);
  }

  RefType& operator*() const {
    return r.a(p_);
  }

  // Ruby VALUE de-reference
  VALUE operator~() const {
    return nm_rb_dereference<D>(**this);
  }

  inline virtual VALUE rb_j() const { return LONG2NUM(j()); }

};


} } // end of namespace nm::yale_storage

#endif // YALE_ITERATORS_ROW_STORED_ND_H


================================================
FILE: ext/nmatrix/storage/yale/iterators/stored_diagonal.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == stored_diagonal_iterator.h
//
// Yale storage diagonal-storage iterator
//

#ifndef YALE_ITERATORS_STORED_DIAGONAL_H
# define YALE_ITERATORS_STORED_DIAGONAL_H

#include <ruby.h>
#include <type_traits>
#include <typeinfo>

namespace nm { namespace yale_storage {

/*
 * Iterate across the stored diagonal.
 */
template <typename D,
          typename RefType,
          typename YaleRef = typename std::conditional<
            std::is_const<RefType>::value,
            const nm::YaleStorage<D>,
            nm::YaleStorage<D>
          >::type>
class stored_diagonal_iterator_T : public basic_iterator_T<D,RefType,YaleRef> {
  using basic_iterator_T<D,RefType,YaleRef>::p_;
  using basic_iterator_T<D,RefType,YaleRef>::y;
  using basic_iterator_T<D,RefType,YaleRef>::offset;
  using basic_iterator_T<D,RefType,YaleRef>::shape;
public:
  stored_diagonal_iterator_T(YaleRef& obj, size_t d = 0)
  : basic_iterator_T<D,RefType,YaleRef>(obj,                // y
                   std::max(obj.offset(0), obj.offset(1)) + d - obj.offset(0), // i_
                   std::max(obj.offset(0), obj.offset(1)) + d) // p_
  {
//      std::cerr << "sdbegin: d=" << d << ", p_=" << p_ << ", i()=" << i() << ", j()=" << j() << std::endl;
    // p_ can range from max(y.offset(0), y.offset(1)) to min(y.real_shape(0), y.real_shape(1))
  }


  size_t d() const {
    return p_ - std::max(offset(0), offset(1));
  }

  stored_diagonal_iterator_T<D,RefType,YaleRef>& operator++() {
    if (i() < shape(0)) ++p_;
    return *this;
  }

  stored_diagonal_iterator_T<D,RefType,YaleRef> operator++(int dummy) const {
    stored_diagonal_iterator_T<D,RefType,YaleRef> iter(*this);
    return ++iter;
  }

  // Indicates if we're at the end of the iteration.
  bool end() const {
    return p_ >= std::min( shape(0) + offset(0), shape(1) + offset(1) );
  }

  // i() and j() are how we know if we're past-the-end. i will be shape(0) and j will be 0.
  size_t i() const {
    return p_ - offset(0);
  }

  size_t j() const {
    return p_ - offset(1);
  }


  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator!=(const stored_diagonal_iterator_T<E,ERefType>& rhs) const { return d() != rhs.d(); }

  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator==(const stored_diagonal_iterator_T<E,ERefType>& rhs) const { return !(*this != rhs); }

  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator<(const stored_diagonal_iterator_T<E,ERefType>& rhs) const {  return d() < rhs.d(); }

  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator<=(const stored_diagonal_iterator_T<E,ERefType>& rhs) const {
    return d() <= rhs.d();
  }

  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator>(const stored_diagonal_iterator_T<E,ERefType>& rhs) const {
    return d() > rhs.d();
  }

  template <typename E, typename ERefType = typename std::conditional<std::is_const<RefType>::value, const E, E>::type>
  bool operator>=(const stored_diagonal_iterator_T<E,ERefType>& rhs) const {
    return d() >= rhs.d();
  }

  RefType& operator*() { return y.a(p_); }
  RefType& operator*() const { return y.a(p_); }

};

} } // end of namespace nm::yale_storage

#endif // YALE_ITERATORS_STORED_DIAGONAL_H


================================================
FILE: ext/nmatrix/storage/yale/math/transpose.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == transpose.h
//
// Functions for Yale math: transposing
//

#ifndef YALE_MATH_TRANSPOSE_H
# define YALE_MATH_TRANSPOSE_H

namespace nm { namespace yale_storage {

/*
 * Transposes a generic Yale matrix (old or new). Specify new by setting RDiag = true.
 *
 * Based on transp from SMMP (same as symbmm and numbmm).
 *
 * This is not named in the same way as most yale_storage functions because it does not act on a YALE_STORAGE
 * object.
 */

template <typename AD, typename BD, bool DiagA, bool Move>
void transpose_yale(const size_t n, const size_t m,
                    const size_t* ia, const size_t* ja, const AD* a, const AD& a_default,
                    size_t* ib, size_t* jb, BD* b, const BD& b_default) {

  size_t index;

  // Clear B
  for (size_t i = 0; i < m+1; ++i) ib[i] = 0;

  if (Move)
    for (size_t i = 0; i < m+1; ++i) b[i] = b_default;

  if (DiagA) ib[0] = m + 1;
  else       ib[0] = 0;

  /* count indices for each column */

  for (size_t i = 0; i < n; ++i) {
    for (size_t j = ia[i]; j < ia[i+1]; ++j) {
      ++(ib[ja[j]+1]);
    }
  }

  for (size_t i = 0; i < m; ++i) {
    ib[i+1] = ib[i] + ib[i+1];
  }

  /* now make jb */

  for (size_t i = 0; i < n; ++i) {

    for (size_t j = ia[i]; j < ia[i+1]; ++j) {
      index = ja[j];
      jb[ib[index]] = i;

      if (Move && a[j] != a_default)
        b[ib[index]] = a[j];

      ++(ib[index]);
    }
  }

  /* now fixup ib */

  for (size_t i = m; i >= 1; --i) {
    ib[i] = ib[i-1];
  }


  if (DiagA) {
    if (Move) {
      size_t j = std::min(n,m);

      for (size_t i = 0; i < j; ++i) {
        b[i] = a[i];
      }
    }
    ib[0] = m + 1;

  } else {
    ib[0] = 0;
  }
}

} } // end of namespace nm::yale_storage

#endif

================================================
FILE: ext/nmatrix/storage/yale/yale.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == yale.c
//
// "new yale" storage format for 2D matrices (like yale, but with
// the diagonal pulled out for O(1) access).
//
// Specifications:
// * dtype and index dtype must necessarily differ
//      * index dtype is defined by whatever unsigned type can store
//        max(rows,cols)
//      * that means vector ija stores only index dtype, but a stores
//        dtype
// * vectors must be able to grow as necessary
//      * maximum size is rows*cols+1

/*
 * Standard Includes
 */

#include <ruby.h>
#include <algorithm>  // std::min
#include <cstdio>     // std::fprintf
#include <iostream>
#include <array>
#include <typeinfo>
#include <tuple>
#include <queue>

/*
 * Project Includes
 */

// #include "types.h"
#include "../../data/data.h"
#include "../../math/math.h"

#include "../common.h"

#include "../../nmatrix.h"
#include "../../data/meta.h"

#include "iterators/base.h"
#include "iterators/stored_diagonal.h"
#include "iterators/row_stored_nd.h"
#include "iterators/row_stored.h"
#include "iterators/row.h"
#include "iterators/iterator.h"
#include "class.h"
#include "yale.h"
#include "../../ruby_constants.h"

/*
 * Macros
 */

#ifndef NM_MAX
#define NM_MAX(a,b) (((a)>(b))?(a):(b))
#define NM_MIN(a,b) (((a)<(b))?(a):(b))
#endif

/*
 * Forward Declarations
 */

extern "C" {
  static YALE_STORAGE*  alloc(nm::dtype_t dtype, size_t* shape, size_t dim);

  static size_t yale_count_slice_copy_ndnz(const YALE_STORAGE* s, size_t*, size_t*);

  static void* default_value_ptr(const YALE_STORAGE* s);
  static VALUE default_value(const YALE_STORAGE* s);
  static VALUE obj_at(YALE_STORAGE* s, size_t k);

  /* Ruby-accessible functions */
  static VALUE nm_size(VALUE self);
  static VALUE nm_a(int argc, VALUE* argv, VALUE self);
  static VALUE nm_d(int argc, VALUE* argv, VALUE self);
  static VALUE nm_lu(VALUE self);
  static VALUE nm_ia(VALUE self);
  static VALUE nm_ja(VALUE self);
  static VALUE nm_ija(int argc, VALUE* argv, VALUE self);
  static VALUE nm_row_keys_intersection(VALUE m1, VALUE ii1, VALUE m2, VALUE ii2);

  static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self);

  static inline size_t src_ndnz(const YALE_STORAGE* s) {
    return reinterpret_cast<YALE_STORAGE*>(s->src)->ndnz;
  }

} // end extern "C" block

namespace nm { namespace yale_storage {

template <typename LD, typename RD>
static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init);

template <typename DType>
static bool            ndrow_is_empty(const YALE_STORAGE* s, IType ija, const IType ija_next);

template <typename LDType, typename RDType>
static bool            ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r, IType l_ija, const IType l_ija_next, IType r_ija, const IType r_ija_next);

template <typename LDType, typename RDType>
static bool           eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right);

template <typename LDType, typename RDType>
static bool eqeq_different_defaults(const YALE_STORAGE* s, const LDType& s_init, const YALE_STORAGE* t, const RDType& t_init);

static void            increment_ia_after(YALE_STORAGE* s, IType ija_size, IType i, long n);

static IType          insert_search(YALE_STORAGE* s, IType left, IType right, IType key, bool& found);

template <typename DType>
static char           vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, void* val_, size_t n, bool struct_only);

template <typename DType>
static char           vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t pos, size_t* j, size_t n, bool struct_only);

template <typename DType>
static std::tuple<long,bool,std::queue<std::tuple<IType,IType,int> > > count_slice_set_ndnz_change(YALE_STORAGE* s, size_t* coords, size_t* lengths, DType* v, size_t v_size);

static inline IType* IJA(const YALE_STORAGE* s) {
  return reinterpret_cast<YALE_STORAGE*>(s->src)->ija;
}

static inline IType IJA_SET(const YALE_STORAGE* s, size_t loc, IType val) {
  return IJA(s)[loc] = val;
}

template <typename DType>
static inline DType* A(const YALE_STORAGE* s) {
  return reinterpret_cast<DType*>(reinterpret_cast<YALE_STORAGE*>(s->src)->a);
}

template <typename DType>
static inline DType A_SET(const YALE_STORAGE* s, size_t loc, DType val) {
  return A<DType>(s)[loc] = val;
}


/*
 * Functions
 */

/*
 * Copy a vector from one DType to another.
 */
template <typename LType, typename RType>
static inline void copy_recast_vector(const void* in_, void* out_, size_t length) {
  const RType* in = reinterpret_cast<const RType*>(in_);
  LType* out      = reinterpret_cast<LType*>(out_);
  for (size_t i = 0; i < length; ++i) {
    out[i] = in[i];
  }
  out;
}


/*
 * Create Yale storage from IA, JA, and A vectors given in Old Yale format (probably from a file, since NMatrix only uses
 * new Yale for its storage).
 *
 * This function is needed for Matlab .MAT v5 IO.
 */
template <typename LDType, typename RDType>
YALE_STORAGE* create_from_old_yale(dtype_t dtype, size_t* shape, char* r_ia, char* r_ja, char* r_a) {
  IType*  ir = reinterpret_cast<IType*>(r_ia);
  IType*  jr = reinterpret_cast<IType*>(r_ja);
  RDType* ar = reinterpret_cast<RDType*>(r_a);

  // Read through ia and ja and figure out the ndnz (non-diagonal non-zeros) count.
  size_t ndnz = 0, i, p, p_next;

  for (i = 0; i < shape[0]; ++i) { // Walk down rows
    for (p = ir[i], p_next = ir[i+1]; p < p_next; ++p) { // Now walk through columns

      if (i != jr[p]) ++ndnz; // entry is non-diagonal and probably nonzero

    }
  }

  // Having walked through the matrix, we now go about allocating the space for it.
  YALE_STORAGE* s = alloc(dtype, shape, 2);

  s->capacity = shape[0] + ndnz + 1;
  s->ndnz     = ndnz;

  // Setup IJA and A arrays
  s->ija = NM_ALLOC_N( IType, s->capacity );
  s->a   = NM_ALLOC_N( LDType, s->capacity );
  IType* ijl    = reinterpret_cast<IType*>(s->ija);
  LDType* al    = reinterpret_cast<LDType*>(s->a);

  // set the diagonal to zero -- this prevents uninitialized values from popping up.
  for (size_t index = 0; index < shape[0]; ++index) {
    al[index] = 0;
  }

  // Figure out where to start writing JA in IJA:
  size_t pp = s->shape[0]+1;

  // Find beginning of first row
  p = ir[0];

  // Now fill the arrays
  for (i = 0; i < s->shape[0]; ++i) {

    // Set the beginning of the row (of output)
    ijl[i] = pp;

    // Now walk through columns, starting at end of row (of input)
    for (size_t p_next = ir[i+1]; p < p_next; ++p, ++pp) {

      if (i == jr[p]) { // diagonal

        al[i] = ar[p];
        --pp;

      } else {          // nondiagonal

        ijl[pp] = jr[p];
        al[pp]  = ar[p];

      }
    }
  }

  ijl[i] = pp; // Set the end of the last row

  // Set the zero position for our output matrix
  al[i] = 0;

  return s;
}


/*
 * Empty the matrix by initializing the IJA vector and setting the diagonal to 0.
 *
 * Called when most YALE_STORAGE objects are created.
 *
 * Can't go inside of class YaleStorage because YaleStorage creation requires that
 * IJA already be initialized.
 */
template <typename DType>
void init(YALE_STORAGE* s, void* init_val) {
  IType IA_INIT = s->shape[0] + 1;

  IType* ija = reinterpret_cast<IType*>(s->ija);
  // clear out IJA vector
  for (IType i = 0; i < IA_INIT; ++i) {
    ija[i] = IA_INIT; // set initial values for IJA
  }

  clear_diagonal_and_zero<DType>(s, init_val);
}


template <typename LDType, typename RDType>
static YALE_STORAGE* slice_copy(YALE_STORAGE* s) {
  YaleStorage<RDType> y(s);
  return y.template alloc_copy<LDType, false>();
}


/*
 * Template version of copy transposed. This could also, in theory, allow a map -- but transpose.h
 * would need to be updated.
 *
 * TODO: Update for slicing? Update for different dtype in and out? We can cast rather easily without
 * too much modification.
 */
template <typename D>
YALE_STORAGE* copy_transposed(YALE_STORAGE* rhs) {
  YaleStorage<D> y(rhs);
  return y.template alloc_copy_transposed<D, false>();
}


///////////////
// Accessors //
///////////////


/*
 * Determine the number of non-diagonal non-zeros in a not-yet-created copy of a slice or matrix.
 */
template <typename DType>
static size_t count_slice_copy_ndnz(const YALE_STORAGE* s, size_t* offset, size_t* shape) {
  IType* ija = s->ija;
  DType* a   = reinterpret_cast<DType*>(s->a);

  DType ZERO(*reinterpret_cast<DType*>(default_value_ptr(s)));

  // Calc ndnz for the destination
  size_t ndnz  = 0;
  size_t i, j; // indexes of destination matrix
  size_t k, l; // indexes of source matrix
  for (i = 0; i < shape[0]; i++) {
    k = i + offset[0];
    for (j = 0; j < shape[1]; j++) {
      l = j + offset[1];

      if (j == i)  continue;

      if (k == l) { // for diagonal element of source
        if (a[k] != ZERO) ++ndnz;
      } else { // for non-diagonal element
        for (size_t c = ija[k]; c < ija[k+1]; c++) {
          if (ija[c] == l) {
            ++ndnz;
            break;
          }
        }
      }
    }
  }

  return ndnz;
}


/*
 * Get a single element of a yale storage object
 */
template <typename DType>
static void* get_single(YALE_STORAGE* storage, SLICE* slice) {
  YaleStorage<DType> y(storage);
  return reinterpret_cast<void*>(y.get_single_p(slice));
}


/*
 * Returns a reference-slice of a matrix.
 */
template <typename DType>
YALE_STORAGE* ref(YALE_STORAGE* s, SLICE* slice) {
  return YaleStorage<DType>(s).alloc_ref(slice);
}


/*
 * Attempt to set a cell or cells in a Yale matrix.
 */
template <typename DType>
void set(VALUE left, SLICE* slice, VALUE right) {
  YALE_STORAGE* storage = NM_STORAGE_YALE(left);
  YaleStorage<DType> y(storage);
  y.insert(slice, right);
}

///////////
// Tests //
///////////

/*
 * Yale eql? -- for whole-matrix comparison returning a single value.
 */
template <typename LDType, typename RDType>
static bool eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right) {
  return YaleStorage<LDType>(left) == YaleStorage<RDType>(right);
}


//////////
// Math //
//////////

#define YALE_IA(s) (reinterpret_cast<IType*>(s->ija))
#define YALE_IJ(s) (reinterpret_cast<IType*>(s->ija) + s->shape[0] + 1)
#define YALE_COUNT(yale) (yale->ndnz + yale->shape[0])

/////////////
// Utility //
/////////////


/*
 * Binary search for finding the beginning of a slice. Returns the position of the first element which is larger than
 * bound.
 */
IType binary_search_left_boundary(const YALE_STORAGE* s, IType left, IType right, IType bound) {
  if (left > right) return -1;

  IType* ija  = IJA(s);

  if (ija[left] >= bound) return left; // shortcut

  IType mid   = (left + right) / 2;
  IType mid_j = ija[mid];

  if (mid_j == bound)
    return mid;
  else if (mid_j > bound) { // eligible! don't exclude it.
    return binary_search_left_boundary(s, left, mid, bound);
  } else // (mid_j < bound)
    return binary_search_left_boundary(s, mid + 1, right, bound);
}


/*
 * Binary search for returning stored values. Returns a non-negative position, or -1 for not found.
 */
int binary_search(YALE_STORAGE* s, IType left, IType right, IType key) {
  if (s->src != s) throw; // need to fix this quickly

  if (left > right) return -1;

  IType* ija = s->ija;

  IType mid = (left + right)/2;
  IType mid_j = ija[mid];

  if (mid_j == key)
    return mid;

  else if (mid_j > key)
    return binary_search(s, left, mid - 1, key);

  else
    return binary_search(s, mid + 1, right, key);
}


/*
 * Resize yale storage vectors A and IJA, copying values.
 */
static void vector_grow(YALE_STORAGE* s) {
  if (s != s->src) {
    throw; // need to correct this quickly.
  }
  nm_yale_storage_register(s);
  size_t new_capacity = s->capacity * GROWTH_CONSTANT;
  size_t max_capacity = YaleStorage<uint8_t>::max_size(s->shape);

  if (new_capacity > max_capacity) new_capacity = max_capacity;

  IType* new_ija      = NM_ALLOC_N(IType, new_capacity);
  void* new_a         = NM_ALLOC_N(char, DTYPE_SIZES[s->dtype] * new_capacity);

  IType* old_ija      = s->ija;
  void* old_a         = s->a;

  memcpy(new_ija, old_ija, s->capacity * sizeof(IType));
  memcpy(new_a,   old_a,   s->capacity * DTYPE_SIZES[s->dtype]);

  s->capacity         = new_capacity;

  if (s->dtype == nm::RUBYOBJ)
    nm_yale_storage_register_a(new_a, s->capacity * DTYPE_SIZES[s->dtype]);

  NM_FREE(old_ija);
  nm_yale_storage_unregister(s);
  NM_FREE(old_a);
  if (s->dtype == nm::RUBYOBJ)
    nm_yale_storage_unregister_a(new_a, s->capacity * DTYPE_SIZES[s->dtype]);

  s->ija         = new_ija;
  s->a           = new_a;

}


/*
 * Resize yale storage vectors A and IJA in preparation for an insertion.
 */
template <typename DType>
static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t pos, size_t* j, size_t n, bool struct_only) {
  if (s != s->src) throw;

  // Determine the new capacity for the IJA and A vectors.
  size_t new_capacity = s->capacity * GROWTH_CONSTANT;
  size_t max_capacity = YaleStorage<DType>::max_size(s->shape);

  if (new_capacity > max_capacity) {
    new_capacity = max_capacity;

    if (current_size + n > max_capacity) rb_raise(rb_eNoMemError, "insertion size exceeded maximum yale matrix size");
  }

  if (new_capacity < current_size + n)
    new_capacity = current_size + n;

  nm_yale_storage_register(s);

  // Allocate the new vectors.
  IType* new_ija     = NM_ALLOC_N( IType, new_capacity );
  NM_CHECK_ALLOC(new_ija);

  DType* new_a       = NM_ALLOC_N( DType, new_capacity );
  NM_CHECK_ALLOC(new_a);

  IType* old_ija     = reinterpret_cast<IType*>(s->ija);
  DType* old_a       = reinterpret_cast<DType*>(s->a);

  // Copy all values prior to the insertion site to the new IJA and new A
  if (struct_only) {
    for (size_t i = 0; i < pos; ++i) {
      new_ija[i] = old_ija[i];
    }
  } else {
    for (size_t i = 0; i < pos; ++i) {
      new_ija[i] = old_ija[i];
      new_a[i]   = old_a[i];
    }
  }


  // Copy all values subsequent to the insertion site to the new IJA and new A, leaving room (size n) for insertion.
  if (struct_only) {
    for (size_t i = pos; i < current_size; ++i) {
      new_ija[i+n] = old_ija[i];
    }
  } else {
    for (size_t i = pos; i < current_size; ++i) {
      new_ija[i+n] = old_ija[i];
      new_a[i+n] = old_a[i];
    }
  }

  s->capacity = new_capacity;
  if (s->dtype == nm::RUBYOBJ)
    nm_yale_storage_register_a(new_a, new_capacity);

  NM_FREE(s->ija);
  nm_yale_storage_unregister(s);
  NM_FREE(s->a);

  if (s->dtype == nm::RUBYOBJ)
    nm_yale_storage_unregister_a(new_a, new_capacity);

  s->ija = new_ija;
  s->a   = reinterpret_cast<void*>(new_a);

  return 'i';
}

/*
 * Insert a value or contiguous values in the ija and a vectors (after ja and
 * diag). Does not free anything; you are responsible!
 *
 * TODO: Improve this so it can handle non-contiguous element insertions
 *  efficiently. For now, we can just sort the elements in the row in
 *  question.)
 */
template <typename DType>
static char vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, void* val_, size_t n, bool struct_only) {

  if (pos < s->shape[0]) {
    rb_raise(rb_eArgError, "vector insert pos (%lu) is before beginning of ja (%lu); this should not happen", pos, s->shape[0]);
  }

  DType* val = reinterpret_cast<DType*>(val_);

  size_t size = s->ija[s->shape[0]];

  IType* ija = s->ija;
  DType* a   = reinterpret_cast<DType*>(s->a);

  if (size + n > s->capacity) {
    vector_insert_resize<DType>(s, size, pos, j, n, struct_only);

    // Need to get the new locations for ija and a.
    ija = s->ija;
    a   = reinterpret_cast<DType*>(s->a);
  } else {
    /*
     * No resize required:
     * easy (but somewhat slow), just copy elements to the tail, starting at
     * the end, one element at a time.
     *
     * TODO: This can be made slightly more efficient, but only after the tests
     *  are written.
     */

    if (struct_only) {
      for (size_t i = 0; i < size - pos; ++i) {
        ija[size+n-1-i] = ija[size-1-i];
      }
    } else {
      for (size_t i = 0; i < size - pos; ++i) {
        ija[size+n-1-i] = ija[size-1-i];
        a[size+n-1-i]   = a[size-1-i];
      }
    }
  }

  // Now insert the new values.
  if (struct_only) {
    for (size_t i = 0; i < n; ++i) {
      ija[pos+i]  = j[i];
    }
  } else {
    for (size_t i = 0; i < n; ++i) {
      ija[pos+i]  = j[i];
      a[pos+i]    = val[i];
    }
  }

  return 'i';
}

/*
 * If we add n items to row i, we need to increment ija[i+1] and onward.
 */
static void increment_ia_after(YALE_STORAGE* s, IType ija_size, IType i, long n) {
  IType* ija = s->ija;

  ++i;
  for (; i <= ija_size; ++i) {
    ija[i] += n;
  }
}

/*
 * Binary search for returning insertion points.
 */
static IType insert_search(YALE_STORAGE* s, IType left, IType right, IType key, bool& found) {

  if (left > right) {
    found = false;
    return left;
  }

  IType* ija = s->ija;
  IType mid = (left + right)/2;
  IType mid_j = ija[mid];

  if (mid_j == key) {
    found = true;
    return mid;

  } else if (mid_j > key) {
    return insert_search(s, left, mid-1, key, found);

  } else {
    return insert_search(s, mid+1, right, key, found);
  }
}

/////////////////////////
// Copying and Casting //
/////////////////////////

/*
 * Templated copy constructor for changing dtypes.
 */
template <typename L, typename R>
YALE_STORAGE* cast_copy(const YALE_STORAGE* rhs) {
  YaleStorage<R> y(rhs);
  return y.template alloc_copy<L>();
}

/*
 * Template access for getting the size of Yale storage.
 */
size_t get_size(const YALE_STORAGE* storage) {
  return storage->ija[ storage->shape[0] ];
}


template <typename DType>
static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
  YALE_STORAGE *left  = (YALE_STORAGE*)(casted_storage.left),
               *right = (YALE_STORAGE*)(casted_storage.right);

  nm_yale_storage_register(left);
  nm_yale_storage_register(right);
  // We can safely get dtype from the casted matrices; post-condition of binary_storage_cast_alloc is that dtype is the
  // same for left and right.
  // int8_t dtype = left->dtype;

  IType* ijl = left->ija;
  IType* ijr = right->ija;

  // First, count the ndnz of the result.
  // TODO: This basically requires running symbmm twice to get the exact ndnz size. That's frustrating. Are there simple
  // cases where we can avoid running it?
  size_t result_ndnz = nm::math::symbmm(resulting_shape[0], left->shape[1], resulting_shape[1], ijl, ijl, true, ijr, ijr, true, NULL, true);

  // Create result storage.
  YALE_STORAGE* result = nm_yale_storage_create(left->dtype, resulting_shape, 2, result_ndnz);
  init<DType>(result, NULL);
  IType* ija = result->ija;

  // Symbolic multiplication step (build the structure)
  nm::math::symbmm(resulting_shape[0], left->shape[1], resulting_shape[1], ijl, ijl, true, ijr, ijr, true, ija, true);

  // Numeric multiplication step (fill in the elements)

  nm::math::numbmm<DType>(result->shape[0], left->shape[1], result->shape[1],
                                ijl, ijl, reinterpret_cast<DType*>(left->a), true,
                                ijr, ijr, reinterpret_cast<DType*>(right->a), true,
                                ija, ija, reinterpret_cast<DType*>(result->a), true);


  // Sort the columns
  nm::math::smmp_sort_columns<DType>(result->shape[0], ija, ija, reinterpret_cast<DType*>(result->a));

  nm_yale_storage_unregister(right);
  nm_yale_storage_unregister(left);
  return reinterpret_cast<STORAGE*>(result);
}


/*
 * Get the sum of offsets from the original matrix (for sliced iteration).
 */
static std::array<size_t,2> get_offsets(YALE_STORAGE* x) {
  std::array<size_t, 2> offsets{ {0,0} };
  while (x != x->src) {
    offsets[0] += x->offset[0];
    offsets[1] += x->offset[1];
    x = reinterpret_cast<YALE_STORAGE*>(x->src);
  }
  return offsets;
}


class RowIterator {
protected:
  YALE_STORAGE* s;
  IType* ija;
  void*  a;
  IType i, k, k_end;
  size_t j_offset, j_shape;
  bool diag, End;
  VALUE init;
public:
  RowIterator(YALE_STORAGE* s_, IType* ija_, IType i_, size_t j_shape_, size_t j_offset_ = 0)
    : s(s_),
      ija(ija_),
      a(reinterpret_cast<YALE_STORAGE*>(s->src)->a),
      i(i_),
      k(ija[i]),
      k_end(ija[i+1]),
      j_offset(j_offset_),
      j_shape(j_shape_),
      diag(row_has_no_nd() || diag_is_first()),
      End(false),
      init(default_value(s))
    { }

  RowIterator(YALE_STORAGE* s_, IType i_, size_t j_shape_, size_t j_offset_ = 0)
    : s(s_),
      ija(IJA(s)),
      a(reinterpret_cast<YALE_STORAGE*>(s->src)->a),
      i(i_),
      k(ija[i]),
      k_end(ija[i+1]),
      j_offset(j_offset_),
      j_shape(j_shape_),
      diag(row_has_no_nd() || diag_is_first()),
      End(false),
      init(default_value(s))
  { }

  RowIterator(const RowIterator& rhs) : s(rhs.s), ija(rhs.ija), a(reinterpret_cast<YALE_STORAGE*>(s->src)->a), i(rhs.i), k(rhs.k), k_end(rhs.k_end), j_offset(rhs.j_offset), j_shape(rhs.j_shape), diag(rhs.diag), End(rhs.End), init(rhs.init) { }

  VALUE obj() const {
    return diag ? obj_at(s, i) : obj_at(s, k);
  }

  template <typename T>
  T cobj() const {
    if (typeid(T) == typeid(RubyObject)) return obj();
    return A<T>(s)[diag ? i : k];
  }

  inline IType proper_j() const {
    return diag ? i : ija[k];
  }

  inline IType offset_j() const {
    return proper_j() - j_offset;
  }

  inline size_t capacity() const {
    return reinterpret_cast<YALE_STORAGE*>(s->src)->capacity;
  }

  inline void vector_grow() {
    YALE_STORAGE* src = reinterpret_cast<YALE_STORAGE*>(s->src);
    nm::yale_storage::vector_grow(src);
    ija = reinterpret_cast<IType*>(src->ija);
    a   = src->a;
  }

  /* Returns true if an additional value is inserted, false if it goes on the diagonal */
  bool insert(IType j, VALUE v) {
    if (j == i) { // insert regardless on diagonal
      reinterpret_cast<VALUE*>(a)[j] = v;
      return false;

    } else {
      if (rb_funcall(v, rb_intern("!="), 1, init) == Qtrue) {
        if (k >= capacity()) {
          vector_grow();
        }
        reinterpret_cast<VALUE*>(a)[k] = v;
        ija[k] = j;
        k++;
        return true;
      }
      return false;
    }
  }

  void update_row_end() {
    ija[i+1] = k;
    k_end    = k;
  }

  /* Past the j_shape? */
  inline bool end() const {
    if (End)  return true;
    //if (diag) return i - j_offset >= j_shape;
    //else return k >= s->capacity || ija[k] - j_offset >= j_shape;
    return (int)(diag ? i : ija[k]) - (int)(j_offset) >= (int)(j_shape);
  }

  inline bool row_has_no_nd() const { return ija[i] == k_end; /* k_start == k_end */  }
  inline bool diag_is_first() const { return i < ija[ija[i]];  }
  inline bool diag_is_last() const  { return i > ija[k_end-1]; } // only works if !row_has_no_nd()
  inline bool k_is_last_nd() const  { return k == k_end-1;     }
  inline bool k_is_last() const     { return k_is_last_nd() && !diag_is_last(); }
  inline bool diag_is_ahead() const { return i > ija[k]; }
  inline bool row_has_diag() const  { return i < s->shape[1];  }
  inline bool diag_is_next() const  { // assumes we've already tested for diag, row_has_no_nd(), diag_is_first()
    if (i == ija[k]+1) return true; // definite next
    else if (k+1 < k_end && i >= ija[k+1]+1) return false; // at least one item before it
    else return true;
  }

  RowIterator& operator++() {
    if (diag) {                                             // we're at the diagonal
      if (row_has_no_nd() || diag_is_last()) End = true;    //  and there are no non-diagonals (or none still to visit)
      diag = false;
    } else if (!row_has_diag()) {                           // row has no diagonal entries
      if (row_has_no_nd() || k_is_last_nd()) End = true;    // row is totally empty, or we're at last entry
      else k++;                                             // still entries to visit
    } else { // not at diag but it exists somewhere in the row, and row has at least one nd entry
      if (diag_is_ahead()) { // diag is ahead
        if (k_is_last_nd()) diag = true; // diag is next and last
        else if (diag_is_next()) {       // diag is next and not last
          diag = true;
          k++;
        } else k++;                      // diag is not next
      } else {                           // diag is past
        if (k_is_last_nd()) End = true;  //   and we're at the end
        else k++;                        //   and we're not at the end
      }
    }

    return *this;
  }


  RowIterator operator++(int unused) {
    RowIterator x(*this);
    ++(*this);
    return x;
  }
};


// Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
// the matrix's storage.
static VALUE nm_yale_stored_enumerator_length(VALUE nmatrix) {
  NM_CONSERVATIVE(nm_register_value(&nmatrix));
  YALE_STORAGE* s   = NM_STORAGE_YALE(nmatrix);
  YALE_STORAGE* src = s->src == s ? s : reinterpret_cast<YALE_STORAGE*>(s->src);
  size_t ia_size    = src->shape[0];
  // FIXME: This needs to be corrected for slicing.
  size_t len = std::min( s->shape[0] + s->offset[0], s->shape[1] + s->offset[1] ) + nm_yale_storage_get_size(src) -  ia_size;
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  return INT2FIX(len);
}


// Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
// the matrix's storage.
static VALUE nm_yale_stored_nondiagonal_enumerator_length(VALUE nmatrix) {
  NM_CONSERVATIVE(nm_register_value(&nmatrix));
  YALE_STORAGE* s = NM_STORAGE_YALE(nmatrix);
  if (s->src != s) s = reinterpret_cast<YALE_STORAGE*>(s->src);  // need to get the original storage shape

  size_t ia_size = s->shape[0];
  size_t len     = nm_yale_storage_get_size(NM_STORAGE_YALE(nmatrix)) - ia_size;
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  return INT2FIX(len);
}

// Helper function for diagonal length.
static VALUE nm_yale_stored_diagonal_enumerator_length(VALUE nmatrix) {
  NM_CONSERVATIVE(nm_register_value(&nmatrix));
  YALE_STORAGE* s = NM_STORAGE_YALE(nmatrix);
  size_t len = std::min( s->shape[0] + s->offset[0], s->shape[1] + s->offset[1] );
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  return INT2FIX(len);
}


// Helper function for full enumerator length.
static VALUE nm_yale_enumerator_length(VALUE nmatrix) {
  NM_CONSERVATIVE(nm_register_value(&nmatrix));
  YALE_STORAGE* s = NM_STORAGE_YALE(nmatrix);
  size_t len = s->shape[0] * s->shape[1];
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  return INT2FIX(len);
}


/*
 * Map the stored values of a matrix in storage order.
 */
template <typename D>
static VALUE map_stored(VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));
  YALE_STORAGE* s = NM_STORAGE_YALE(self);
  YaleStorage<D> y(s);

  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&self));
  RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_yale_stored_enumerator_length);

  YALE_STORAGE* r = y.template alloc_copy<nm::RubyObject, true>();
  nm_yale_storage_register(r);
  NMATRIX* m      = nm_create(nm::YALE_STORE, reinterpret_cast<STORAGE*>(r));
  VALUE to_return = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, m);
  nm_yale_storage_unregister(r);
  NM_CONSERVATIVE(nm_unregister_value(&self));
  return to_return;
}


/*
 * map_stored which visits the stored entries of two matrices in order.
 */
template <typename LD, typename RD>
static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init) {
  nm::YaleStorage<LD> l(NM_STORAGE_YALE(left));
  nm::YaleStorage<RD> r(NM_STORAGE_YALE(right));
  VALUE to_return = l.map_merged_stored(CLASS_OF(left), r, init);
  return to_return;
}


/*
 * Iterate over the stored entries in Yale (diagonal and non-diagonal non-zeros)
 */
template <typename DType>
static VALUE each_stored_with_indices(VALUE nm) {
  NM_CONSERVATIVE(nm_register_value(&nm));
  YALE_STORAGE* s = NM_STORAGE_YALE(nm);
  YaleStorage<DType> y(s);

  // If we don't have a block, return an enumerator.
  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&nm));
  RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);

  for (typename YaleStorage<DType>::const_stored_diagonal_iterator d = y.csdbegin(); d != y.csdend(); ++d) {
    rb_yield_values(3, ~d, d.rb_i(), d.rb_j());
  }

  for (typename YaleStorage<DType>::const_row_iterator it = y.cribegin(); it != y.criend(); ++it) {
    for (auto jt = it.ndbegin(); jt != it.ndend(); ++jt) {
      rb_yield_values(3, ~jt, it.rb_i(), jt.rb_j());
    }
  }

  NM_CONSERVATIVE(nm_unregister_value(&nm));

  return nm;
}


/*
 * Iterate over the stored diagonal entries in Yale.
 */
template <typename DType>
static VALUE stored_diagonal_each_with_indices(VALUE nm) {
  NM_CONSERVATIVE(nm_register_value(&nm));

  YALE_STORAGE* s = NM_STORAGE_YALE(nm);
  YaleStorage<DType> y(s);

  // If we don't have a block, return an enumerator.
  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&nm));
  RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_diagonal_length); // FIXME: need diagonal length

  for (typename YaleStorage<DType>::const_stored_diagonal_iterator d = y.csdbegin(); d != y.csdend(); ++d) {
    rb_yield_values(3, ~d, d.rb_i(), d.rb_j());
  }

  NM_CONSERVATIVE(nm_unregister_value(&nm));

  return nm;
}


/*
 * Iterate over the stored diagonal entries in Yale.
 */
template <typename DType>
static VALUE stored_nondiagonal_each_with_indices(VALUE nm) {
  NM_CONSERVATIVE(nm_register_value(&nm));

  YALE_STORAGE* s = NM_STORAGE_YALE(nm);
  YaleStorage<DType> y(s);

  // If we don't have a block, return an enumerator.
  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&nm));
  RETURN_SIZED_ENUMERATOR(nm, 0, 0, 0); // FIXME: need diagonal length

  for (typename YaleStorage<DType>::const_row_iterator it = y.cribegin(); it != y.criend(); ++it) {
    for (auto jt = it.ndbegin(); jt != it.ndend(); ++jt) {
      rb_yield_values(3, ~jt, it.rb_i(), jt.rb_j());
    }
  }

  NM_CONSERVATIVE(nm_unregister_value(&nm));

  return nm;
}


/*
 * Iterate over the stored entries in Yale in order of i,j. Visits every diagonal entry, even if it's the default.
 */
template <typename DType>
static VALUE each_ordered_stored_with_indices(VALUE nm) {
  NM_CONSERVATIVE(nm_register_value(&nm));

  YALE_STORAGE* s = NM_STORAGE_YALE(nm);
  YaleStorage<DType> y(s);

  // If we don't have a block, return an enumerator.
  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&nm));
  RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);

  for (typename YaleStorage<DType>::const_row_iterator it = y.cribegin(); it != y.criend(); ++it) {
    for (auto jt = it.begin(); jt != it.end(); ++jt) {
      rb_yield_values(3, ~jt, it.rb_i(), jt.rb_j());
    }
  }

  NM_CONSERVATIVE(nm_unregister_value(&nm));

  return nm;
}


template <typename DType>
static VALUE each_with_indices(VALUE nm) {
  NM_CONSERVATIVE(nm_register_value(&nm));

  YALE_STORAGE* s = NM_STORAGE_YALE(nm);
  YaleStorage<DType> y(s);

  // If we don't have a block, return an enumerator.
  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&nm));
  RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);

  for (typename YaleStorage<DType>::const_iterator iter = y.cbegin(); iter != y.cend(); ++iter) {
    rb_yield_values(3, ~iter, iter.rb_i(), iter.rb_j());
  }

  NM_CONSERVATIVE(nm_unregister_value(&nm));

  return nm;
}

template <typename D>
static bool is_pos_default_value(YALE_STORAGE* s, size_t apos) {
  YaleStorage<D> y(s);
  return y.is_pos_default_value(apos);
}

} // end of namespace nm::yale_storage

} // end of namespace nm.

///////////////////
// Ruby Bindings //
///////////////////

/* These bindings are mostly only for debugging Yale. They are called from Init_nmatrix. */

extern "C" {

void nm_init_yale_functions() {
  /*
   * This module stores methods that are useful for debugging Yale matrices,
   * i.e. the ones with +:yale+ stype.
   */
  cNMatrix_YaleFunctions = rb_define_module_under(cNMatrix, "YaleFunctions");

  // Expert recommendation. Eventually this should go in a separate gem, or at least a separate module.
  rb_define_method(cNMatrix_YaleFunctions, "yale_row_keys_intersection", (METHOD)nm_row_keys_intersection, 3);

  // Debugging functions.
  rb_define_method(cNMatrix_YaleFunctions, "yale_ija", (METHOD)nm_ija, -1);
  rb_define_method(cNMatrix_YaleFunctions, "yale_a", (METHOD)nm_a, -1);
  rb_define_method(cNMatrix_YaleFunctions, "yale_size", (METHOD)nm_size, 0);
  rb_define_method(cNMatrix_YaleFunctions, "yale_ia", (METHOD)nm_ia, 0);
  rb_define_method(cNMatrix_YaleFunctions, "yale_ja", (METHOD)nm_ja, 0);
  rb_define_method(cNMatrix_YaleFunctions, "yale_d", (METHOD)nm_d, -1);
  rb_define_method(cNMatrix_YaleFunctions, "yale_lu", (METHOD)nm_lu, 0);

  rb_define_method(cNMatrix_YaleFunctions, "yale_nd_row", (METHOD)nm_nd_row, -1);

  /* Document-const:
   * Defines the growth rate of the sparse NMatrix's size. Default is 1.5.
   */
  rb_define_const(cNMatrix_YaleFunctions, "YALE_GROWTH_CONSTANT", rb_float_new(nm::yale_storage::GROWTH_CONSTANT));

  // This is so the user can easily check the IType size, mostly for debugging.
  size_t itype_size = sizeof(IType);
  VALUE itype_dtype;
  if (itype_size == sizeof(uint64_t)) {
    itype_dtype = ID2SYM(rb_intern("int64"));
  } else if (itype_size == sizeof(uint32_t)) {
    itype_dtype = ID2SYM(rb_intern("int32"));
  } else if (itype_size == sizeof(uint16_t)) {
    itype_dtype = ID2SYM(rb_intern("int16"));
  } else {
    rb_raise(rb_eStandardError, "unhandled length for sizeof(IType): %lu; note that IType is probably defined as size_t", sizeof(IType));
  }
  rb_define_const(cNMatrix, "INDEX_DTYPE", itype_dtype);
}

/////////////////
// C ACCESSORS //
/////////////////

/* C interface for NMatrix#each_with_indices (Yale) */
VALUE nm_yale_each_with_indices(VALUE nmatrix) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::each_with_indices, VALUE, VALUE)

  return ttable[ NM_DTYPE(nmatrix) ](nmatrix);
}


/* C interface for NMatrix#each_stored_with_indices (Yale) */
VALUE nm_yale_each_stored_with_indices(VALUE nmatrix) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::each_stored_with_indices, VALUE, VALUE)

  return ttable[ NM_DTYPE(nmatrix) ](nmatrix);
}


/* Iterate along stored diagonal (not actual diagonal!) */
VALUE nm_yale_stored_diagonal_each_with_indices(VALUE nmatrix) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::stored_diagonal_each_with_indices, VALUE, VALUE)

  return ttable[ NM_DTYPE(nmatrix) ](nmatrix);
}

/* Iterate through stored nondiagonal (not actual diagonal!) */
VALUE nm_yale_stored_nondiagonal_each_with_indices(VALUE nmatrix) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::stored_nondiagonal_each_with_indices, VALUE, VALUE)

  return ttable[ NM_DTYPE(nmatrix) ](nmatrix);
}


/* C interface for NMatrix#each_ordered_stored_with_indices (Yale) */
VALUE nm_yale_each_ordered_stored_with_indices(VALUE nmatrix) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::each_ordered_stored_with_indices, VALUE, VALUE)

  return ttable[ NM_DTYPE(nmatrix) ](nmatrix);
}


/*
 * C accessor for inserting some value in a matrix (or replacing an existing cell).
 */
void nm_yale_storage_set(VALUE left, SLICE* slice, VALUE right) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::set, void, VALUE left, SLICE* slice, VALUE right);

  ttable[NM_DTYPE(left)](left, slice, right);
}


/*
 * Determine the number of non-diagonal non-zeros in a not-yet-created copy of a slice or matrix.
 */
static size_t yale_count_slice_copy_ndnz(const YALE_STORAGE* s, size_t* offset, size_t* shape) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::count_slice_copy_ndnz, size_t, const YALE_STORAGE*, size_t*, size_t*)

  return ttable[s->dtype](s, offset, shape);
}


/*
 * C accessor for yale_storage::get, which returns a slice of YALE_STORAGE object by copy
 *
 * Slicing-related.
 */
void* nm_yale_storage_get(const STORAGE* storage, SLICE* slice) {
  YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;

  if (slice->single) {
    NAMED_DTYPE_TEMPLATE_TABLE(elem_copy_table,  nm::yale_storage::get_single, void*, YALE_STORAGE*, SLICE*)

    return elem_copy_table[casted_storage->dtype](casted_storage, slice);
  } else {
    nm_yale_storage_register(casted_storage);
    //return reinterpret_cast<void*>(nm::YaleStorage<nm::dtype_enum_T<storage->dtype>::type>(casted_storage).alloc_ref(slice));
    NAMED_DTYPE_TEMPLATE_TABLE(ref_table, nm::yale_storage::ref, YALE_STORAGE*, YALE_STORAGE* storage, SLICE* slice)

    YALE_STORAGE* ref = ref_table[casted_storage->dtype](casted_storage, slice);

    NAMED_LR_DTYPE_TEMPLATE_TABLE(slice_copy_table, nm::yale_storage::slice_copy, YALE_STORAGE*, YALE_STORAGE*)

    YALE_STORAGE* ns = slice_copy_table[casted_storage->dtype][casted_storage->dtype](ref);

    NM_FREE(ref);

    nm_yale_storage_unregister(casted_storage);

    return ns;
  }
}

/*
 * C accessor for yale_storage::vector_insert
 */
static char nm_yale_storage_vector_insert(YALE_STORAGE* s, size_t pos, size_t* js, void* vals, size_t n, bool struct_only, nm::dtype_t dtype) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::vector_insert, char, YALE_STORAGE*, size_t, size_t*, void*, size_t, bool);

  return ttable[dtype](s, pos, js, vals, n, struct_only);
}

/*
 * C accessor for yale_storage::increment_ia_after, typically called after ::vector_insert
 */
static void nm_yale_storage_increment_ia_after(YALE_STORAGE* s, size_t ija_size, size_t i, long n) {
  nm::yale_storage::increment_ia_after(s, ija_size, i, n);
}


/*
 * C accessor for yale_storage::ref, which returns either a pointer to the correct location in a YALE_STORAGE object
 * for some set of coordinates, or a pointer to a single element.
 */
void* nm_yale_storage_ref(const STORAGE* storage, SLICE* slice) {
  YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;

  if (slice->single) {
    //return reinterpret_cast<void*>(nm::YaleStorage<nm::dtype_enum_T<storage->dtype>::type>(casted_storage).get_single_p(slice));
    NAMED_DTYPE_TEMPLATE_TABLE(elem_copy_table,  nm::yale_storage::get_single, void*, YALE_STORAGE*, SLICE*)
    return elem_copy_table[casted_storage->dtype](casted_storage, slice);
  } else {
    //return reinterpret_cast<void*>(nm::YaleStorage<nm::dtype_enum_T<storage->dtype>::type>(casted_storage).alloc_ref(slice));
    NAMED_DTYPE_TEMPLATE_TABLE(ref_table, nm::yale_storage::ref, YALE_STORAGE*, YALE_STORAGE* storage, SLICE* slice)
    return reinterpret_cast<void*>(ref_table[casted_storage->dtype](casted_storage, slice));

  }
}


/*
 * C accessor for determining whether two YALE_STORAGE objects have the same contents.
 */
bool nm_yale_storage_eqeq(const STORAGE* left, const STORAGE* right) {
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::eqeq, bool, const YALE_STORAGE* left, const YALE_STORAGE* right);

  const YALE_STORAGE* casted_left = reinterpret_cast<const YALE_STORAGE*>(left);

  return ttable[casted_left->dtype][right->dtype](casted_left, (const YALE_STORAGE*)right);
}


/*
 * Copy constructor for changing dtypes. (C accessor)
 */
STORAGE* nm_yale_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::cast_copy, YALE_STORAGE*, const YALE_STORAGE* rhs);

  const YALE_STORAGE* casted_rhs = reinterpret_cast<const YALE_STORAGE*>(rhs);
  //return reinterpret_cast<STORAGE*>(nm::YaleStorage<nm::dtype_enum_T< rhs->dtype >::type>(rhs).alloc_copy<nm::dtype_enum_T< new_dtype >::type>());
  return (STORAGE*)ttable[new_dtype][casted_rhs->dtype](casted_rhs);
}


/*
 * Returns size of Yale storage as a size_t (no matter what the itype is). (C accessor)
 */
size_t nm_yale_storage_get_size(const YALE_STORAGE* storage) {
  return nm::yale_storage::get_size(storage);
}


/*
 * Return a pointer to the matrix's default value entry.
 */
static void* default_value_ptr(const YALE_STORAGE* s) {
  return reinterpret_cast<void*>(reinterpret_cast<char*>(((YALE_STORAGE*)(s->src))->a) + (((YALE_STORAGE*)(s->src))->shape[0] * DTYPE_SIZES[s->dtype]));
}

/*
 * Return the Ruby object at a given location in storage.
 */
static VALUE obj_at(YALE_STORAGE* s, size_t k) {
  if (s->dtype == nm::RUBYOBJ)  return reinterpret_cast<VALUE*>(((YALE_STORAGE*)(s->src))->a)[k];
  else  return nm::rubyobj_from_cval(reinterpret_cast<void*>(reinterpret_cast<char*>(((YALE_STORAGE*)(s->src))->a) + k * DTYPE_SIZES[s->dtype]), s->dtype).rval;
}


/*
 * Return the matrix's default value as a Ruby VALUE.
 */
static VALUE default_value(const YALE_STORAGE* s) {
  if (s->dtype == nm::RUBYOBJ) return *reinterpret_cast<VALUE*>(default_value_ptr(s));
  else return nm::rubyobj_from_cval(default_value_ptr(s), s->dtype).rval;
}


/*
 * Check to see if a default value is some form of zero. Easy for non-Ruby object matrices, which should always be 0.
 */
static bool default_value_is_numeric_zero(const YALE_STORAGE* s) {
  return rb_funcall(default_value(s), rb_intern("=="), 1, INT2FIX(0)) == Qtrue;
}


/*
 * Transposing copy constructor.
 */
STORAGE* nm_yale_storage_copy_transposed(const STORAGE* rhs_base) {
  YALE_STORAGE* rhs = (YALE_STORAGE*)rhs_base;
  NAMED_DTYPE_TEMPLATE_TABLE(transp, nm::yale_storage::copy_transposed, YALE_STORAGE*, YALE_STORAGE*)
  return (STORAGE*)(transp[rhs->dtype](rhs));
}

/*
 * C accessor for multiplying two YALE_STORAGE matrices, which have already been casted to the same dtype.
 *
 * FIXME: There should be some mathematical way to determine the worst-case IType based on the input ITypes. Right now
 * it just uses the default.
 */
STORAGE* nm_yale_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
  DTYPE_TEMPLATE_TABLE(nm::yale_storage::matrix_multiply, STORAGE*, const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);

  YALE_STORAGE* left = reinterpret_cast<YALE_STORAGE*>(casted_storage.left);
  YALE_STORAGE* right = reinterpret_cast<YALE_STORAGE*>(casted_storage.right);

  if (!default_value_is_numeric_zero(left) || !default_value_is_numeric_zero(right)) {
    rb_raise(rb_eNotImpError, "matrix default value must be some form of zero (not false or nil) for multiplication");
    return NULL;
  }

  return ttable[left->dtype](casted_storage, resulting_shape, vector);
}


///////////////
// Lifecycle //
///////////////

/*
 * C accessor function for creating a YALE_STORAGE object. Prior to calling this function, you MUST
 * allocate shape (should be size_t * 2) -- don't use use a regular size_t array!
 *
 * For this type, dim must always be 2. The final argument is the initial capacity with which to
 * create the storage.
 */

YALE_STORAGE* nm_yale_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, size_t init_capacity) {
  if (dim != 2) {
    rb_raise(nm_eStorageTypeError, "yale supports only 2-dimensional matrices");
  }
  DTYPE_OBJECT_STATIC_TABLE(nm::YaleStorage, create, YALE_STORAGE*, size_t* shape, size_t init_capacity)
  return ttable[dtype](shape, init_capacity);
}

/*
 * Destructor for yale storage (C-accessible).
 */
void nm_yale_storage_delete(STORAGE* s) {
  if (s) {
    YALE_STORAGE* storage = (YALE_STORAGE*)s;
    if (storage->count-- == 1) {
      NM_FREE(storage->shape);
      NM_FREE(storage->offset);
      NM_FREE(storage->ija);
      NM_FREE(storage->a);
      NM_FREE(storage);
    }
  }
}

/*
 * Destructor for the yale storage ref
 */
void nm_yale_storage_delete_ref(STORAGE* s) {
  if (s) {
    YALE_STORAGE* storage = (YALE_STORAGE*)s;
    nm_yale_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
    NM_FREE(storage->shape);
    NM_FREE(storage->offset);
    NM_FREE(s);
  }
}

/*
 * C accessor for yale_storage::init, a templated function.
 *
 * Initializes the IJA vector of the YALE_STORAGE matrix.
 */
void nm_yale_storage_init(YALE_STORAGE* s, void* init_val) {
  DTYPE_TEMPLATE_TABLE(nm::yale_storage::init, void, YALE_STORAGE*, void*);

  ttable[s->dtype](s, init_val);
}


/*
 * Ruby GC mark function for YALE_STORAGE. C accessible.
 */
void nm_yale_storage_mark(STORAGE* storage_base) {
  YALE_STORAGE* storage = (YALE_STORAGE*)storage_base;

  if (storage && storage->dtype == nm::RUBYOBJ) {

    VALUE* a = (VALUE*)(storage->a);
    rb_gc_mark_locations(a, &(a[storage->capacity-1]));
  }
}

void nm_yale_storage_register_a(void* a, size_t size) {
  nm_register_values(reinterpret_cast<VALUE*>(a), size);
}

void nm_yale_storage_unregister_a(void* a, size_t size) {
  nm_unregister_values(reinterpret_cast<VALUE*>(a), size);
}

void nm_yale_storage_register(const STORAGE* s) {
  const YALE_STORAGE* y = reinterpret_cast<const YALE_STORAGE*>(s);
  if (y->dtype == nm::RUBYOBJ) {
    nm_register_values(reinterpret_cast<VALUE*>(y->a), nm::yale_storage::get_size(y));
  }
}

void nm_yale_storage_unregister(const STORAGE* s) {
  const YALE_STORAGE* y = reinterpret_cast<const YALE_STORAGE*>(s);
  if (y->dtype == nm::RUBYOBJ) {
    nm_unregister_values(reinterpret_cast<VALUE*>(y->a), nm::yale_storage::get_size(y));
  }
}

/*
 * Allocates and initializes the basic struct (but not the IJA or A vectors).
 *
 * This function is ONLY used when creating from old yale.
 */
static YALE_STORAGE* alloc(nm::dtype_t dtype, size_t* shape, size_t dim) {
  YALE_STORAGE* s;

  s = NM_ALLOC( YALE_STORAGE );

  s->ndnz        = 0;
  s->dtype       = dtype;
  s->shape       = shape;
  s->offset      = NM_ALLOC_N(size_t, dim);
  for (size_t i = 0; i < dim; ++i)
    s->offset[i] = 0;
  s->dim         = dim;
  s->src         = reinterpret_cast<STORAGE*>(s);
  s->count       = 1;

  return s;
}

YALE_STORAGE* nm_yale_storage_create_from_old_yale(nm::dtype_t dtype, size_t* shape, char* ia, char* ja, char* a, nm::dtype_t from_dtype) {
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::create_from_old_yale, YALE_STORAGE*, nm::dtype_t dtype, size_t* shape, char* r_ia, char* r_ja, char* r_a);

  return ttable[dtype][from_dtype](dtype, shape, ia, ja, a);

}

//////////////////////////////////////////////
// YALE-SPECIFIC FUNCTIONS (RUBY ACCESSORS) //
//////////////////////////////////////////////

/*
 * call-seq:
 *     yale_size -> Integer
 *
 * Get the size of a Yale matrix (the number of elements actually stored).
 *
 * For capacity (the maximum number of elements that can be stored without a resize), use capacity instead.
 */
static VALUE nm_size(VALUE self) {
  YALE_STORAGE* s = (YALE_STORAGE*)(NM_SRC(self));
  VALUE to_return = INT2FIX(nm::yale_storage::IJA(s)[s->shape[0]]);
  return to_return;
}


/*
 * Determine if some pos in the diagonal is the default. No bounds checking!
 */
static bool is_pos_default_value(YALE_STORAGE* s, size_t apos) {
  DTYPE_TEMPLATE_TABLE(nm::yale_storage::is_pos_default_value, bool, YALE_STORAGE*, size_t)
  return ttable[s->dtype](s, apos);
}


/*
 * call-seq:
 *     yale_row_keys_intersection(i, m2, i2) -> Array
 *
 * This function is experimental.
 *
 * It finds the intersection of row i of the current matrix with row i2 of matrix m2.
 * Both matrices must be Yale. They may not be slices.
 *
 * Only checks the stored indices; does not care about matrix default value.
 */
static VALUE nm_row_keys_intersection(VALUE m1, VALUE ii1, VALUE m2, VALUE ii2) {

  NM_CONSERVATIVE(nm_register_value(&m1));
  NM_CONSERVATIVE(nm_register_value(&m2));

  if (NM_SRC(m1) != NM_STORAGE(m1) || NM_SRC(m2) != NM_STORAGE(m2)) {
    NM_CONSERVATIVE(nm_unregister_value(&m2));
    NM_CONSERVATIVE(nm_unregister_value(&m1));
    rb_raise(rb_eNotImpError, "must be called on a real matrix and not a slice");
  }

  size_t i1 = FIX2INT(ii1),
         i2 = FIX2INT(ii2);

  YALE_STORAGE *s   = NM_STORAGE_YALE(m1),
               *t   = NM_STORAGE_YALE(m2);

  size_t pos1 = s->ija[i1],
         pos2 = t->ija[i2];

  size_t nextpos1 = s->ija[i1+1],
         nextpos2 = t->ija[i2+1];

  size_t diff1 = nextpos1 - pos1,
         diff2 = nextpos2 - pos2;

  // Does the diagonal have a nonzero in it?
  bool diag1 = i1 < s->shape[0] && !is_pos_default_value(s, i1),
       diag2 = i2 < t->shape[0] && !is_pos_default_value(t, i2);

  // Reserve max(diff1,diff2) space -- that's the max intersection possible.
  VALUE ret = rb_ary_new2(std::max(diff1,diff2)+1);
  nm_register_value(&ret);

  // Handle once the special case where both have the diagonal in exactly
  // the same place.
  if (diag1 && diag2 && i1 == i2) {
    rb_ary_push(ret, INT2FIX(i1));
    diag1 = false; diag2 = false; // no need to deal with diagonals anymore.
  }

  // Now find the intersection.
  size_t idx1 = pos1, idx2 = pos2;
  while (idx1 < nextpos1 && idx2 < nextpos2) {
    if (s->ija[idx1] == t->ija[idx2]) {
      rb_ary_push(ret, INT2FIX(s->ija[idx1]));
      ++idx1; ++idx2;
    } else if (diag1 && i1 == t->ija[idx2]) {
      rb_ary_push(ret, INT2FIX(i1));
      diag1 = false;
      ++idx2;
    } else if (diag2 && i2 == s->ija[idx1]) {
      rb_ary_push(ret, INT2FIX(i2));
      diag2 = false;
      ++idx1;
    } else if (s->ija[idx1] < t->ija[idx2]) {
      ++idx1;
    } else { // s->ija[idx1] > t->ija[idx2]
      ++idx2;
    }
  }

  // Past the end of row i2's stored entries; need to try to find diagonal
  if (diag2 && idx1 < nextpos1) {
    idx1 = nm::yale_storage::binary_search_left_boundary(s, idx1, nextpos1, i2);
    if (s->ija[idx1] == i2) rb_ary_push(ret, INT2FIX(i2));
  }

  // Find the diagonal, if possible, in the other one.
  if (diag1 && idx2 < nextpos2) {
    idx2 = nm::yale_storage::binary_search_left_boundary(t, idx2, nextpos2, i1);
    if (t->ija[idx2] == i1) rb_ary_push(ret, INT2FIX(i1));
  }

  nm_unregister_value(&ret);
  NM_CONSERVATIVE(nm_unregister_value(&m1));
  NM_CONSERVATIVE(nm_unregister_value(&m2));

  return ret;
}


/*
 * call-seq:
 *     yale_a -> Array
 *     yale_d(index) -> ...
 *
 * Get the A array of a Yale matrix (which stores the diagonal and the LU portions of the matrix).
 */
static VALUE nm_a(int argc, VALUE* argv, VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));

  VALUE idx;
  rb_scan_args(argc, argv, "01", &idx);
  NM_CONSERVATIVE(nm_register_value(&idx));

  YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));
  size_t size = nm_yale_storage_get_size(s);

  if (idx == Qnil) {

    VALUE* vals = NM_ALLOCA_N(VALUE, size);

    nm_register_values(vals, size);

    if (NM_DTYPE(self) == nm::RUBYOBJ) {
      for (size_t i = 0; i < size; ++i) {
        vals[i] = reinterpret_cast<VALUE*>(s->a)[i];
      }
    } else {
      for (size_t i = 0; i < size; ++i) {
        vals[i] = nm::rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
      }
    }
    VALUE ary = rb_ary_new4(size, vals);

    for (size_t i = size; i < s->capacity; ++i)
      rb_ary_push(ary, Qnil);

    nm_unregister_values(vals, size);
    NM_CONSERVATIVE(nm_unregister_value(&idx));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    return ary;
  } else {
    size_t index = FIX2INT(idx);
    NM_CONSERVATIVE(nm_unregister_value(&idx));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    if (index >= size) rb_raise(rb_eRangeError, "out of range");
    return nm::rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype] * index, s->dtype).rval;
  }
}


/*
 * call-seq:
 *     yale_d -> Array
 *     yale_d(index) -> ...
 *
 * Get the diagonal ("D") portion of the A array of a Yale matrix.
 */
static VALUE nm_d(int argc, VALUE* argv, VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));
  VALUE idx;
  rb_scan_args(argc, argv, "01", &idx);
  NM_CONSERVATIVE(nm_register_value(&idx));

  YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));

  if (idx == Qnil) {
    VALUE* vals = NM_ALLOCA_N(VALUE, s->shape[0]);

    nm_register_values(vals, s->shape[0]);

    if (NM_DTYPE(self) == nm::RUBYOBJ) {
      for (size_t i = 0; i < s->shape[0]; ++i) {
        vals[i] = reinterpret_cast<VALUE*>(s->a)[i];
      }
    } else {
      for (size_t i = 0; i < s->shape[0]; ++i) {
        vals[i] = nm::rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
      }
    }
    nm_unregister_values(vals, s->shape[0]);
    NM_CONSERVATIVE(nm_unregister_value(&idx));
    NM_CONSERVATIVE(nm_unregister_value(&self));

    return rb_ary_new4(s->shape[0], vals);
  } else {
    size_t index = FIX2INT(idx);
    NM_CONSERVATIVE(nm_unregister_value(&idx));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    if (index >= s->shape[0]) rb_raise(rb_eRangeError, "out of range");
    return nm::rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype] * index, s->dtype).rval;
  }
}

/*
 * call-seq:
 *     yale_lu -> Array
 *
 * Get the non-diagonal ("LU") portion of the A array of a Yale matrix.
 */
static VALUE nm_lu(VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));

  YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));

  size_t size = nm_yale_storage_get_size(s);

  VALUE* vals = NM_ALLOCA_N(VALUE, size - s->shape[0] - 1);

  nm_register_values(vals, size - s->shape[0] - 1);

  if (NM_DTYPE(self) == nm::RUBYOBJ) {
    for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
      vals[i] = reinterpret_cast<VALUE*>(s->a)[s->shape[0] + 1 + i];
    }
  } else {
    for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
      vals[i] = nm::rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*(s->shape[0] + 1 + i), s->dtype).rval;
    }
  }

  VALUE ary = rb_ary_new4(size - s->shape[0] - 1, vals);

  for (size_t i = size; i < s->capacity; ++i)
    rb_ary_push(ary, Qnil);

  nm_unregister_values(vals, size - s->shape[0] - 1);
  NM_CONSERVATIVE(nm_unregister_value(&self));

  return ary;
}

/*
 * call-seq:
 *     yale_ia -> Array
 *
 * Get the IA portion of the IJA array of a Yale matrix. This gives the start and end positions of rows in the
 * JA and LU portions of the IJA and A arrays, respectively.
 */
static VALUE nm_ia(VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));

  YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));

  VALUE* vals = NM_ALLOCA_N(VALUE, s->shape[0] + 1);

  for (size_t i = 0; i < s->shape[0] + 1; ++i) {
    vals[i] = INT2FIX(s->ija[i]);
  }

  NM_CONSERVATIVE(nm_unregister_value(&self));

  return rb_ary_new4(s->shape[0]+1, vals);
}

/*
 * call-seq:
 *     yale_ja -> Array
 *
 * Get the JA portion of the IJA array of a Yale matrix. This gives the column indices for entries in corresponding
 * positions in the LU portion of the A array.
 */
static VALUE nm_ja(VALUE self) {

  NM_CONSERVATIVE(nm_register_value(&self));

  YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));

  size_t size = nm_yale_storage_get_size(s);

  VALUE* vals = NM_ALLOCA_N(VALUE, size - s->shape[0] - 1);

  nm_register_values(vals, size - s->shape[0] - 1);

  for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
    vals[i] = INT2FIX(s->ija[s->shape[0] + 1 + i]);
  }

  VALUE ary = rb_ary_new4(size - s->shape[0] - 1, vals);

  for (size_t i = size; i < s->capacity; ++i)
    rb_ary_push(ary, Qnil);

  nm_unregister_values(vals, size - s->shape[0] - 1);
  NM_CONSERVATIVE(nm_unregister_value(&self));

  return ary;
}

/*
 * call-seq:
 *     yale_ija -> Array
 *     yale_ija(index) -> ...
 *
 * Get the IJA array of a Yale matrix (or a component of the IJA array).
 */
static VALUE nm_ija(int argc, VALUE* argv, VALUE self) {
  NM_CONSERVATIVE(nm_register_value(&self));

  VALUE idx;
  rb_scan_args(argc, argv, "01", &idx);
  NM_CONSERVATIVE(nm_register_value(&idx));

  YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));
  size_t size = nm_yale_storage_get_size(s);

  if (idx == Qnil) {

    VALUE* vals = NM_ALLOCA_N(VALUE, size);

    nm_register_values(vals, size);

    for (size_t i = 0; i < size; ++i) {
      vals[i] = INT2FIX(s->ija[i]);
    }

   VALUE ary = rb_ary_new4(size, vals);

    for (size_t i = size; i < s->capacity; ++i)
      rb_ary_push(ary, Qnil);

    nm_unregister_values(vals, size);
    NM_CONSERVATIVE(nm_unregister_value(&idx));
    NM_CONSERVATIVE(nm_unregister_value(&self));

    return ary;

  } else {
    size_t index = FIX2INT(idx);
    if (index >= size) rb_raise(rb_eRangeError, "out of range");
    NM_CONSERVATIVE(nm_unregister_value(&self));
    NM_CONSERVATIVE(nm_unregister_value(&idx));
    return INT2FIX(s->ija[index]);
  }
}


/*
 * call-seq:
 *     yale_nd_row -> ...
 *
 * This function gets the non-diagonal contents of a Yale matrix row.
 * The first argument should be the row index. The optional second argument may be :hash or :keys, but defaults
 * to :hash. If :keys is given, it will only return the Hash keys (the column indices).
 *
 * This function is meant to accomplish its purpose as efficiently as possible. It does not check for appropriate
 * range.
 */
static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {

  NM_CONSERVATIVE(nm_register_value(&self));
  if (NM_SRC(self) != NM_STORAGE(self)) {
    NM_CONSERVATIVE(nm_unregister_value(&self));
    rb_raise(rb_eNotImpError, "must be called on a real matrix and not a slice");
  }

  VALUE i_, as;
  rb_scan_args(argc, argv, "11", &i_, &as);
  NM_CONSERVATIVE(nm_register_value(&as));
  NM_CONSERVATIVE(nm_register_value(&i_));

  bool keys = false;
  if (as != Qnil && rb_to_id(as) != nm_rb_hash) keys = true;

  size_t i = FIX2INT(i_);

  YALE_STORAGE* s   = NM_STORAGE_YALE(self);
  //nm::dtype_t dtype = NM_DTYPE(self);

  if (i >= s->shape[0]) {
    NM_CONSERVATIVE(nm_unregister_value(&self));
    NM_CONSERVATIVE(nm_unregister_value(&as));
    NM_CONSERVATIVE(nm_unregister_value(&i_));
    rb_raise(rb_eRangeError, "out of range (%lu >= %lu)", i, s->shape[0]);
  }

  size_t pos = s->ija[i];
  size_t nextpos = s->ija[i+1];
  size_t diff = nextpos - pos;

  VALUE ret;
  if (keys) {
    ret = rb_ary_new3(diff);

    for (size_t idx = pos; idx < nextpos; ++idx) {
      rb_ary_store(ret, idx - pos, INT2FIX(s->ija[idx]));
    }

  } else {
    ret = rb_hash_new();

    for (size_t idx = pos; idx < nextpos; ++idx) {
      rb_hash_aset(ret, INT2FIX(s->ija[idx]), nm::rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*idx, s->dtype).rval);
    }
  }
  NM_CONSERVATIVE(nm_unregister_value(&as));
  NM_CONSERVATIVE(nm_unregister_value(&i_));
  NM_CONSERVATIVE(nm_unregister_value(&self));
  return ret;
}

/*
 * call-seq:
 *     yale_vector_set(i, column_index_array, cell_contents_array, pos) -> Fixnum
 *
 * Insert at position pos an array of non-diagonal elements with column indices given. Note that the column indices and values
 * must be storage-contiguous -- that is, you can't insert them around existing elements in some row, only amid some
 * elements in some row. You *can* insert them around a diagonal element, since this is stored separately. This function
 * may not be used for the insertion of diagonal elements in most cases, as these are already present in the data
 * structure and are typically modified by replacement rather than insertion.
 *
 * The last argument, pos, may be nil if you want to insert at the beginning of a row. Otherwise it needs to be provided.
 * Don't expect this function to know the difference. It really does very little checking, because its goal is to make
 * multiple contiguous insertion as quick as possible.
 *
 * You should also not attempt to insert values which are the default (0). These are not supposed to be stored, and may
 * lead to undefined behavior.
 *
 * Example:
 *    m.yale_vector_set(3, [0,3,4], [1,1,1], 15)
 *
 * The example above inserts the values 1, 1, and 1 in columns 0, 3, and 4, assumed to be located at position 15 (which
 * corresponds to row 3).
 *
 * Example:
 *    next = m.yale_vector_set(3, [0,3,4], [1,1,1])
 *
 * This example determines that i=3 is at position 15 automatically. The value returned, next, is the position where the
 * next value(s) should be inserted.
 */
VALUE nm_vector_set(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv, VALUE vv, VALUE pos_) {

  NM_CONSERVATIVE(nm_register_value(&self));

  if (NM_SRC(self) != NM_STORAGE(self)) {
    NM_CONSERVATIVE(nm_unregister_value(&self));
    rb_raise(rb_eNotImpError, "must be called on a real matrix and not a slice");
  }

  // i, jv, vv are mandatory; pos is optional; thus "31"
  VALUE i_, jv, vv, pos_;
  rb_scan_args(argc, argv, "31", &i_, &jv, &vv, &pos_);
  NM_CONSERVATIVE(nm_register_value(&i_));
  NM_CONSERVATIVE(nm_register_value(&jv));
  NM_CONSERVATIVE(nm_register_value(&vv));
  NM_CONSERVATIVE(nm_register_value(&pos_));

  size_t len   = RARRAY_LEN(jv); // need length in order to read the arrays in
  size_t vvlen = RARRAY_LEN(vv);

  if (len != vvlen) {
    NM_CONSERVATIVE(nm_unregister_value(&pos_));
    NM_CONSERVATIVE(nm_unregister_value(&vv));
    NM_CONSERVATIVE(nm_unregister_value(&jv));
    NM_CONSERVATIVE(nm_unregister_value(&i_));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    rb_raise(rb_eArgError, "lengths must match between j array (%lu) and value array (%lu)", len, vvlen);
  }

  YALE_STORAGE* s   = NM_STORAGE_YALE(self);
  nm::dtype_t dtype = NM_DTYPE(self);

  size_t i   = FIX2INT(i_);    // get the row
  size_t pos = s->ija[i];

  // Allocate the j array and the values array
  size_t* j  = NM_ALLOCA_N(size_t, len);
  void* vals = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * len);
  if (dtype == nm::RUBYOBJ){
    nm_register_values(reinterpret_cast<VALUE*>(vals), len);
  }

  // Copy array contents
  for (size_t idx = 0; idx < len; ++idx) {
    j[idx] = FIX2INT(rb_ary_entry(jv, idx));
    rubyval_to_cval(rb_ary_entry(vv, idx), dtype, (char*)vals + idx * DTYPE_SIZES[dtype]);
  }

  nm_yale_storage_vector_insert(s, pos, j, vals, len, false, dtype);
  nm_yale_storage_increment_ia_after(s, s->shape[0], i, len);
  s->ndnz += len;

  if (dtype == nm::RUBYOBJ){
    nm_unregister_values(reinterpret_cast<VALUE*>(vals), len);
  }

  NM_CONSERVATIVE(nm_unregister_value(&pos_));
  NM_CONSERVATIVE(nm_unregister_value(&vv));
  NM_CONSERVATIVE(nm_unregister_value(&jv));
  NM_CONSERVATIVE(nm_unregister_value(&i_));
  NM_CONSERVATIVE(nm_unregister_value(&self));

  // Return the updated position
  pos += len;
  return INT2FIX(pos);
}


/*
 * call-seq:
 *     __yale_default_value__ -> ...
 *
 * Get the default_value property from a yale matrix.
 */
VALUE nm_yale_default_value(VALUE self) {
  VALUE to_return = default_value(NM_STORAGE_YALE(self));
  return to_return;
}


/*
 * call-seq:
 *     __yale_map_merged_stored__(right) -> Enumerator
 *
 * A map operation on two Yale matrices which only iterates across the stored indices.
 */
VALUE nm_yale_map_merged_stored(VALUE left, VALUE right, VALUE init) {
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::map_merged_stored, VALUE, VALUE, VALUE, VALUE)
  return ttable[NM_DTYPE(left)][NM_DTYPE(right)](left, right, init);
  //return nm::yale_storage::map_merged_stored(left, right, init);
}


/*
 * call-seq:
 *     __yale_map_stored__ -> Enumerator
 *
 * A map operation on two Yale matrices which only iterates across the stored indices.
 */
VALUE nm_yale_map_stored(VALUE self) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::map_stored, VALUE, VALUE)
  return ttable[NM_DTYPE(self)](self);
}

} // end of extern "C" block


================================================
FILE: ext/nmatrix/storage/yale/yale.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == yale.h
//
// "new yale" storage format for 2D matrices (like yale, but with
// the diagonal pulled out for O(1) access).
//
// Specifications:
// * dtype and index dtype must necessarily differ
//      * index dtype is defined by whatever unsigned type can store
//        max(rows,cols)
//      * that means vector ija stores only index dtype, but a stores
//        dtype
// * vectors must be able to grow as necessary
//      * maximum size is rows*cols+1

#ifndef YALE_H
#define YALE_H

/*
 * Standard Includes
 */

#include <ruby.h>
#include <limits> // for std::numeric_limits<T>::max()
#include <stdexcept>

/*
 * Project Includes
 */

#include "../../types.h"
#include "../../data/data.h"
#include "../common.h"
#include "../../nmatrix.h"

extern "C" {

  /*
   * Macros
   */

  #ifndef NM_CHECK_ALLOC
   #define NM_CHECK_ALLOC(x) if (!x) rb_raise(rb_eNoMemError, "insufficient memory");
  #endif

  /*
   * Types
   */


  /*
   * Data
   */


  /*
   * Functions
   */

  ///////////////
  // Lifecycle //
  ///////////////

  YALE_STORAGE* nm_yale_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, size_t init_capacity);
  YALE_STORAGE* nm_yale_storage_create_from_old_yale(nm::dtype_t dtype, size_t* shape, char* ia, char* ja, char* a, nm::dtype_t from_dtype);
  YALE_STORAGE*  nm_yale_storage_create_merged(const YALE_STORAGE* merge_template, const YALE_STORAGE* other);
  void          nm_yale_storage_delete(STORAGE* s);
  void          nm_yale_storage_delete_ref(STORAGE* s);
  void          nm_yale_storage_init(YALE_STORAGE* s, void* default_val);
  void          nm_yale_storage_mark(STORAGE*);
  void          nm_yale_storage_register(const STORAGE* s);
  void          nm_yale_storage_unregister(const STORAGE* s);
  void    nm_yale_storage_register_a(void* a, size_t size);
  void    nm_yale_storage_unregister_a(void* a, size_t size); 
    
  ///////////////
  // Accessors //
  ///////////////

  VALUE nm_yale_each_with_indices(VALUE nmatrix);
  VALUE nm_yale_each_stored_with_indices(VALUE nmatrix);
  VALUE nm_yale_stored_diagonal_each_with_indices(VALUE nmatrix);
  VALUE nm_yale_stored_nondiagonal_each_with_indices(VALUE nmatrix);
  VALUE nm_yale_each_ordered_stored_with_indices(VALUE nmatrix);
  void* nm_yale_storage_get(const STORAGE* s, SLICE* slice);
  void*  nm_yale_storage_ref(const STORAGE* s, SLICE* slice);
  void  nm_yale_storage_set(VALUE left, SLICE* slice, VALUE right);

  //char  nm_yale_storage_vector_insert(YALE_STORAGE* s, size_t pos, size_t* js, void* vals, size_t n, bool struct_only, nm::dtype_t dtype, nm::itype_t itype);
  //void  nm_yale_storage_increment_ia_after(YALE_STORAGE* s, size_t ija_size, size_t i, size_t n);

  size_t  nm_yale_storage_get_size(const YALE_STORAGE* storage);
  VALUE   nm_yale_default_value(VALUE self);
  VALUE   nm_yale_map_stored(VALUE self);
  VALUE   nm_yale_map_merged_stored(VALUE left, VALUE right, VALUE init);

  ///////////
  // Tests //
  ///////////

  bool nm_yale_storage_eqeq(const STORAGE* left, const STORAGE* right);

  //////////
  // Math //
  //////////

  STORAGE* nm_yale_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);

  /////////////
  // Utility //
  /////////////


  /////////////////////////
  // Copying and Casting //
  /////////////////////////

  STORAGE*      nm_yale_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void*);
  STORAGE*      nm_yale_storage_copy_transposed(const STORAGE* rhs_base);


  void nm_init_yale_functions(void);

  VALUE nm_vector_set(int argc, VALUE* argv, VALUE self);


} // end of extern "C" block

namespace nm {

namespace yale_storage {

  /*
   * Typedefs
   */

  typedef size_t IType;


  /*
   * Templated Functions
   */

  int binary_search(YALE_STORAGE* s, IType left, IType right, IType key);

  /*
   * Clear out the D portion of the A vector (clearing the diagonal and setting
   * the zero value).
   *
   * Note: This sets a literal 0 value. If your dtype is RUBYOBJ (a Ruby object),
   * it'll actually be INT2FIX(0) instead of a string of NULLs. You can actually
   * set a default for Ruby objects other than zero -- you generally want it to
   * be Qfalse, Qnil, or INT2FIX(0). The last is the default.
   */
  template <typename DType>
  inline void clear_diagonal_and_zero(YALE_STORAGE* s, void* init_val) {
    DType* a = reinterpret_cast<DType*>(s->a);

    // Clear out the diagonal + one extra entry
    if (init_val) {
      for (size_t i = 0; i <= s->shape[0]; ++i) // insert Ruby zeros, falses, or whatever else.
        a[i] = *reinterpret_cast<DType*>(init_val);
    } else {
      for (size_t i = 0; i <= s->shape[0]; ++i) // insert zeros.
        a[i] = 0;
    }
  }

  template <typename DType>
  void init(YALE_STORAGE* s, void* init_val);

  size_t  get_size(const YALE_STORAGE* storage);

  IType binary_search_left_boundary(const YALE_STORAGE* s, IType left, IType right, IType bound);


}} // end of namespace nm::yale_storage

#endif // YALE_H


================================================
FILE: ext/nmatrix/types.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == types.h
//
// Definition of simple types used throughout NMatrix.

#ifndef NMATRIX_TYPES_H
#define NMATRIX_TYPES_H

/*
 * Standard Includes
 */

#include <ruby.h>
#include <cstdint>

/*
 * Macros
 */

#define EPSILON 1E-10
#define FP_IS_ZERO(n) (-EPSILON < n && n < EPSILON)
#define FP_EQUAL(a, b) FP_IS_ZERO((a - b))

/*
 * Types
 */

typedef float    float32_t;
typedef double  float64_t;

typedef size_t  IType;

#endif


================================================
FILE: ext/nmatrix/util/io.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == io.cpp
//
// Input/output support functions.

#include "io.h"

#include <ruby.h>

namespace nm { namespace io {

  const char* const MATLAB_DTYPE_NAMES[NUM_MATLAB_DTYPES] = {
    "miUNDEFINED0",
    "miINT8",
    "miUINT8",
    "miINT16",
    "miUINT16",
    "miINT32",
    "miUINT32",
    "miSINGLE",
    "miRESERVED8",
    "miDOUBLE",
    "miRESERVED10",
    "miRESERVED11",
    "miINT64",
    "miUINT64",
    "miMATRIX"
  };

  const size_t MATLAB_DTYPE_SIZES[NUM_MATLAB_DTYPES] = {
    1, // undefined
    1, // int8
    1, // uint8
    2, // int16
    2, // uint16
    4, // int32
    4, // uint32
    sizeof(float),
    1, // reserved
    sizeof(double),
    1, // reserved
    1, // reserved
    8, // int64
    8, // uint64
    1  // matlab array?
  };


/*
 * Templated function for converting from MATLAB dtypes to NMatrix dtypes.
 */
template <typename DType, typename MDType>
char* matlab_cstring_to_dtype_string(size_t& result_len, const char* str, size_t bytes) {

  result_len   = sizeof(DType) * bytes / sizeof(MDType);
  char* result = NM_ALLOC_N(char, result_len);

  if (bytes % sizeof(MDType) != 0) {
    rb_raise(rb_eArgError, "the given string does not divide evenly for the given MATLAB dtype");
  }

  for (size_t i = 0, j = 0; i < bytes; i += sizeof(MDType), j += sizeof(DType)) {
    *reinterpret_cast<DType*>(result+j) = (DType)(*reinterpret_cast<const MDType*>(str + i));
  }

  return result;
}


}} // end of namespace nm::io

extern "C" {

///////////////////////
// Utility Functions //
///////////////////////

/*
 * Converts a string to a data type.
 */
nm::dtype_t nm_dtype_from_rbstring(VALUE str) {

  for (size_t index = 0; index < NM_NUM_DTYPES; ++index) {
    if (!std::strncmp(RSTRING_PTR(str), DTYPE_NAMES[index], RSTRING_LEN(str))) {
      return static_cast<nm::dtype_t>(index);
    }
  }

  rb_raise(rb_eArgError, "invalid data type string (%s) specified", RSTRING_PTR(str));
}


/*
 * Converts a symbol to a data type.
 */
nm::dtype_t nm_dtype_from_rbsymbol(VALUE sym) {
  ID sym_id = SYM2ID(sym);

  for (size_t index = 0; index < NM_NUM_DTYPES; ++index) {
    if (sym_id == rb_intern(DTYPE_NAMES[index])) {
      return static_cast<nm::dtype_t>(index);
    }
  }

  VALUE str = rb_any_to_s(sym);
  rb_raise(rb_eArgError, "invalid data type symbol (:%s) specified", RSTRING_PTR(str));
}


/*
 * Converts a string to a storage type. Only looks at the first three
 * characters.
 */
nm::stype_t nm_stype_from_rbstring(VALUE str) {

  for (size_t index = 0; index < NM_NUM_STYPES; ++index) {
    if (!std::strncmp(RSTRING_PTR(str), STYPE_NAMES[index], 3)) {
      return static_cast<nm::stype_t>(index);
    }
  }

  rb_raise(rb_eArgError, "Invalid storage type string specified");
  return nm::DENSE_STORE;
}

/*
 * Converts a symbol to a storage type.
 */
nm::stype_t nm_stype_from_rbsymbol(VALUE sym) {

  for (size_t index = 0; index < NM_NUM_STYPES; ++index) {
    if (SYM2ID(sym) == rb_intern(STYPE_NAMES[index])) {
      return static_cast<nm::stype_t>(index);
    }
  }

  VALUE str = rb_any_to_s(sym);
  rb_raise(rb_eArgError, "invalid storage type symbol (:%s) specified", RSTRING_PTR(str));
  return nm::DENSE_STORE;
}


/*
 * Converts a MATLAB data-type symbol to an enum.
 */
static nm::io::matlab_dtype_t matlab_dtype_from_rbsymbol(VALUE sym) {
  for (size_t index = 0; index < nm::io::NUM_MATLAB_DTYPES; ++index) {
    if (SYM2ID(sym) == rb_intern(nm::io::MATLAB_DTYPE_NAMES[index])) {
      return static_cast<nm::io::matlab_dtype_t>(index);
    }
  }

  rb_raise(rb_eArgError, "Invalid matlab type specified.");
}


/*
 * Take a string of bytes which represent MATLAB data type values and repack them into a string
 * of bytes representing values of an NMatrix dtype (or itype).
 *
 * Returns what appears to be a Ruby String.
 *
 * Arguments:
 * * str        :: the data
 * * from       :: symbol representing MATLAB data type (e.g., :miINT8)
 * * type       :: either :itype or some dtype symbol (:byte, :uint32, etc)
 */
static VALUE nm_rbstring_matlab_repack(VALUE self, VALUE str, VALUE from, VALUE type) {
  nm::io::matlab_dtype_t from_type = matlab_dtype_from_rbsymbol(from);
  uint8_t to_type;

  if (SYMBOL_P(type)) {
    if (rb_to_id(type) == rb_intern("itype")) {
      if (sizeof(size_t) == sizeof(int64_t)) {
        to_type = static_cast<int8_t>(nm::INT64);
      } else if (sizeof(size_t) == sizeof(int32_t)) {
        to_type = static_cast<int8_t>(nm::INT32);
      } else if (sizeof(size_t) == sizeof(int16_t)) {
        to_type = static_cast<int8_t>(nm::INT16);
      } else {
        rb_raise(rb_eStandardError, "unhandled size_t definition");
      }
    } else {
      to_type = static_cast<uint8_t>(nm_dtype_from_rbsymbol(type));
    }
  } else {
    rb_raise(rb_eArgError, "expected symbol for third argument");
  }

  // For next few lines, see explanation above NM_MATLAB_DTYPE_TEMPLATE_TABLE definition in io.h.
  if (to_type >= static_cast<uint8_t>(nm::COMPLEX64)) {
    rb_raise(rb_eArgError, "can only repack into a simple dtype, no complex/VALUE");
  }

  // Do the actual repacking -- really simple!
  NM_MATLAB_DTYPE_TEMPLATE_TABLE(ttable, nm::io::matlab_cstring_to_dtype_string, char*, size_t& result_len, const char* str, size_t bytes);

  size_t repacked_data_length;
  char* repacked_data = ttable[to_type][from_type](repacked_data_length, RSTRING_PTR(str), RSTRING_LEN(str));

  // Encode as 8-bit ASCII with a length -- don't want to hiccup on \0
  VALUE result = rb_str_new(repacked_data, repacked_data_length);
  NM_FREE(repacked_data); // Don't forget to free what we allocated!

  return result;
}


/*
 * Take two byte-strings (real and imaginary) and treat them as if they contain
 * a sequence of data of type dtype. Merge them together and return a new string.
 */
static VALUE nm_rbstring_merge(VALUE self, VALUE rb_real, VALUE rb_imaginary, VALUE rb_dtype) {

  // Sanity check.
  if (RSTRING_LEN(rb_real) != RSTRING_LEN(rb_imaginary)) {
    rb_raise(rb_eArgError, "real and imaginary components do not have same length");
  }

  nm::dtype_t dtype = nm_dtype_from_rbsymbol(rb_dtype);
  size_t len        = DTYPE_SIZES[dtype];

  char *real        = RSTRING_PTR(rb_real),
       *imag        = RSTRING_PTR(rb_imaginary);

  char* merge       = NM_ALLOCA_N(char, RSTRING_LEN(rb_real)*2);

  size_t merge_pos  = 0;

  // Merge the two sequences
  for (size_t i = 0; i < (size_t)RSTRING_LEN(rb_real); i += len) {

    // Copy real number
    memcpy(merge + merge_pos, real + i, len);
    merge_pos += len;

    // Copy imaginary number
    memcpy(merge + merge_pos, imag + i, len);
    merge_pos += len;
  }

  return rb_str_new(merge, merge_pos);
}


void nm_init_io() {
  cNMatrix_IO = rb_define_module_under(cNMatrix, "IO");
  cNMatrix_IO_Matlab = rb_define_module_under(cNMatrix_IO, "Matlab");

  rb_define_singleton_method(cNMatrix_IO_Matlab, "repack", (METHOD)nm_rbstring_matlab_repack, 3);
  rb_define_singleton_method(cNMatrix_IO_Matlab, "complex_merge", (METHOD)nm_rbstring_merge, 3);
}


}


================================================
FILE: ext/nmatrix/util/io.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == io.h
//
// Header file for input/output support functions.

#ifndef NMATRIX_IO_H
#define NMATRIX_IO_H

/*
 * Project Includes
 */

#include "nmatrix.h"

#include "data/data.h"
#include "storage/storage.h"

/*
 * Extern Types
 */
extern const char* const DTYPE_NAMES[nm::NUM_DTYPES];

namespace nm { namespace io {
  /*
   * Types
   */
  enum matlab_dtype_t {
    miINT8 = 1,
    miUINT8 = 2,
    miINT16 = 3,
    miUINT16 = 4,
    miINT32 = 5,
    miUINT32 = 6,
    miSINGLE = 7,
    miDOUBLE = 9,
    miINT64 = 12,
    miUINT64 = 13,
    miMATRIX = 14
  };

  /*
   * Constants
   */

  const size_t NUM_MATLAB_DTYPES = 15;
}} // end of namespace nm::io

extern "C" {

  /*
   * C accessors.
   */
  nm::dtype_t nm_dtype_from_rbsymbol(VALUE sym);
  nm::dtype_t nm_dtype_from_rbstring(VALUE str);
  nm::stype_t nm_stype_from_rbsymbol(VALUE sym);
  nm::stype_t nm_stype_from_rbstring(VALUE str);

  void nm_init_io(void);


  /*
   * Macros.
   */

  /*
   * Macro for a function pointer table between NMatrix dtypes and MATLAB dtypes.
   *
   * You can't convert as freely between these two as you can between NMatrix dtypes, but there's no reason to. MATLAB
   * stores its complex numbers in two separate arrays, for example, not as a single unit of data. If you want to convert
   * to a VALUE, convert first to an appropriate integer or float type.
   *
   * FIXME: Maybe be a little more selective about which conversions we DO allow. This is really just for loading an
   * already-constructed MATLAB matrix into memory, and most of these functions will never get called.
   */
  #define NM_MATLAB_DTYPE_TEMPLATE_TABLE(name,fun,ret,...)    \
  static ret (*(name)[7][nm::io::NUM_MATLAB_DTYPES])(__VA_ARGS__) = {  \
      { NULL, fun<uint8_t,int8_t>, fun<uint8_t,uint8_t>, fun<uint8_t,int16_t>, fun<uint8_t,uint16_t>, fun<uint8_t,int32_t>, fun<uint8_t,uint32_t>, fun<uint8_t,float>, NULL, fun<uint8_t,double>, NULL, NULL, fun<uint8_t,int64_t>, fun<uint8_t,uint64_t>, NULL },  \
      { NULL, fun<int8_t,int8_t>, fun<int8_t,uint8_t>, fun<int8_t,int16_t>, fun<int8_t,uint16_t>, fun<int8_t,int32_t>, fun<int8_t,uint32_t>, fun<int8_t,float>, NULL, fun<int8_t,double>, NULL, NULL, fun<int8_t,int64_t>, fun<int8_t,uint64_t>, NULL },            \
      { NULL, fun<int16_t,int8_t>, fun<int16_t,uint8_t>, fun<int16_t,int16_t>, fun<int16_t,uint16_t>, fun<int16_t,int32_t>, fun<int16_t,uint32_t>, fun<int16_t,float>, NULL, fun<int16_t,double>, NULL, NULL, fun<int16_t,int64_t>, fun<int16_t,uint64_t>, NULL },  \
      { NULL, fun<int32_t,int8_t>, fun<int32_t,uint8_t>, fun<int32_t,int16_t>, fun<int32_t,uint16_t>, fun<int32_t,int32_t>, fun<int32_t,uint32_t>, fun<int32_t,float>, NULL, fun<int32_t,double>, NULL, NULL, fun<int32_t,int64_t>, fun<int32_t,uint64_t>, NULL },  \
      { NULL, fun<int64_t,int8_t>, fun<int64_t,uint8_t>, fun<int64_t,int16_t>, fun<int64_t,uint16_t>, fun<int64_t,int32_t>, fun<int64_t,uint32_t>, fun<int64_t,float>, NULL, fun<int64_t,double>, NULL, NULL, fun<int64_t,int64_t>, fun<int64_t,uint64_t>, NULL },  \
      { NULL, fun<float,int8_t>, fun<float,uint8_t>, fun<float,int16_t>, fun<float,uint16_t>, fun<float,int32_t>, fun<float,uint32_t>, fun<float,float>, NULL, fun<float,double>, NULL, NULL, fun<float,int64_t>, fun<float,uint64_t>, NULL },                      \
      { NULL, fun<double,int8_t>, fun<double,uint8_t>, fun<double,int16_t>, fun<double,uint16_t>, fun<double,int32_t>, fun<double,uint32_t>, fun<double,float>, NULL, fun<double,double>, NULL, NULL, fun<double,int64_t>, fun<double,uint64_t>, NULL }             \
    };

  /*
   * Hash#has_key? for symbols. Arguments are: hash (VALUE), string (char*).
   */
  #define RB_HASH_HAS_SYMBOL_KEY(hash, str)   (rb_funcall((hash), rb_intern("has_key?"), 1, ID2SYM(rb_intern(str))) == Qtrue)
}


#endif


================================================
FILE: ext/nmatrix/util/sl_list.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == sl_list.cpp
//
// Singly-linked list implementation

/*
 * Standard Includes
 */

#include <ruby.h>

/*
 * Project Includes
 */

#include "types.h"

#include "data/data.h"

#include "sl_list.h"

#include "storage/list/list.h"

namespace nm { namespace list {

/*
 * Macros
 */

#ifndef RHASH_SET_IFNONE
#define RHASH_SET_IFNONE(h, v) (RHASH(h)->ifnone = (v))
#endif

/*
 * Global Variables
 */
 

/*
 * Forward Declarations
 */

/*
 * Functions
 */

////////////////
// Lifecycle //
///////////////

/*
 * Creates an empty linked list.
 */
LIST* create(void) {
  LIST* list = NM_ALLOC( LIST );
  list->first = NULL;
  return list;
}

/*
 * Deletes the linked list and all of its contents. If you want to delete a
 * list inside of a list, set recursions to 1. For lists inside of lists inside
 *  of the list, set it to 2; and so on. Setting it to 0 is for no recursions.
 */
void del(LIST* list, size_t recursions) {
  NODE* next;
  NODE* curr = list->first;

  while (curr != NULL) {
    next = curr->next;

    if (recursions == 0) {
      //fprintf(stderr, "    free_val: %p\n", curr->val);
      nm_list_storage_completely_unregister_node(curr);
      NM_FREE(curr->val);
      
    } else {
      //fprintf(stderr, "    free_list: %p\n", list);
      del((LIST*)curr->val, recursions - 1);
    }

    NM_FREE(curr);
    curr = next;
  }
  //fprintf(stderr, "    free_list: %p\n", list);
  NM_FREE(list);
}

/*
 * Documentation goes here.
 */
void mark(LIST* list, size_t recursions) {
  NODE* next;
  NODE* curr = list->first;

  while (curr != NULL) {
    next = curr->next;
    
    if (recursions == 0) {
      rb_gc_mark(*((VALUE*)(curr->val)));
      
    } else {
      mark((LIST*)curr->val, recursions - 1);
    }
    
    curr = next;
  }
}

///////////////
// Accessors //
///////////////


/*
 * Given a list, insert key/val as the first entry in the list. Does not do any
 * checks, just inserts.
 */
NODE* insert_first_node(LIST* list, size_t key, void* val, size_t val_size) {
  NODE* ins   = NM_ALLOC(NODE);
  ins->next   = list->first;

  void* val_copy = NM_ALLOC_N(char, val_size);
  memcpy(val_copy, val, val_size);

  ins->val    = reinterpret_cast<void*>(val_copy);
  ins->key    = key;
  list->first = ins;

  return ins;
}

NODE* insert_first_list(LIST* list, size_t key, LIST* l) {
  NODE* ins   = NM_ALLOC(NODE);
  ins->next   = list->first;

  ins->val    = reinterpret_cast<void*>(l);
  ins->key    = key;
  list->first = ins;

  return ins;
}


/* 
 * Given a list and a key/value-ptr pair, create a node (and return that node).
 * If NULL is returned, it means insertion failed.
 * If the key already exists in the list, replace tells it to delete the old
 * value and put in your new one. !replace means delete the new value.
 */
NODE* insert(LIST* list, bool replace, size_t key, void* val) {
  NODE *ins;

  if (list->first == NULL) {
    // List is empty
    
    //if (!(ins = malloc(sizeof(NODE)))) return NULL;
    ins = NM_ALLOC(NODE);
    ins->next             = NULL;
    ins->val              = val;
    ins->key              = key;
    list->first           = ins;
    
    return ins;

  } else if (key < list->first->key) {
    // Goes at the beginning of the list
    
    //if (!(ins = malloc(sizeof(NODE)))) return NULL;
    ins = NM_ALLOC(NODE);
    ins->next             = list->first;
    ins->val              = val;
    ins->key              = key;
    list->first           = ins;
    
    return ins;
  }

  // Goes somewhere else in the list.
  ins = find_nearest_from(list->first, key);

  if (ins->key == key) {
    // key already exists
    if (replace) {
      nm_list_storage_completely_unregister_node(ins);
      NM_FREE(ins->val);
      ins->val = val;
    } else {
      NM_FREE(val);
    }
    
    return ins;

  } else {
    return insert_after(ins, key, val);
  }
}


/*
 * Documentation goes here.
 */
NODE* insert_after(NODE* node, size_t key, void* val) {
  //if (!(ins = malloc(sizeof(NODE)))) return NULL;
  NODE* ins = NM_ALLOC(NODE);

  // insert 'ins' between 'node' and 'node->next'
  ins->next  = node->next;
  node->next = ins;

  // initialize our new node
  ins->key   = key;
  ins->val   = val;

  return ins;
}


/*
 * Insert a new node immediately after +node+, or replace the existing one if its key is a match.
 */
NODE* replace_insert_after(NODE* node, size_t key, void* val, bool copy, size_t copy_size) {
  if (node->next && node->next->key == key) {

    // Should we copy into the current one or free and insert?
    if (copy) memcpy(node->next->val, val, copy_size);
    else {
      NM_FREE(node->next->val);
      node->next->val = val;
    }

    return node->next;

  } else { // no next node, or if there is one, it's greater than the current key

    if (copy) {
      void* val_copy = NM_ALLOC_N(char, copy_size);
      memcpy(val_copy, val, copy_size);
      return insert_after(node, key, val_copy);
    } else {
      return insert_after(node, key, val);
    }

  }
}


/*
 * Functions analogously to list::insert but this inserts a copy of the value instead of the original.
 */
NODE* insert_copy(LIST *list, bool replace, size_t key, void *val, size_t size) {
  void *copy_val = NM_ALLOC_N(char, size);
  memcpy(copy_val, val, size);

  return insert(list, replace, key, copy_val);
}


/*
 * Returns the value pointer for some key. Doesn't free the memory for that value. Doesn't require a find operation,
 * assumes finding has already been done. If rm is the first item in the list, prev should be NULL.
 */
void* remove_by_node(LIST* list, NODE* prev, NODE* rm) {
  if (!prev)  list->first = rm->next;
  else        prev->next  = rm->next;

  void* val   = rm->val;
  NM_FREE(rm);

  return val;
}


/*
 * Returns the value pointer (not the node) for some key. Note that it doesn't
 * free the memory for the value stored in the node -- that pointer gets
 * returned! Only the node is destroyed.
 */
void* remove_by_key(LIST* list, size_t key) {
  NODE *f, *rm;
  void* val;

  if (!list->first || list->first->key > key) { // empty list or def. not present
    return NULL;
  }

  if (list->first->key == key) {
    val = list->first->val;
    rm  = list->first;
    
    list->first = rm->next;
    NM_FREE(rm);
    
    return val;
  }

  f = find_preceding_from_node(list->first, key);
  if (!f || !f->next) { // not found, end of list
    return NULL;
  }

  if (f->next->key == key) {
    // remove the node
    rm      = f->next;
    f->next = rm->next;

    // get the value and free the memory for the node
    val = rm->val;
    NM_FREE(rm);

    return val;
  }

  return NULL; // not found, middle of list
}


bool node_is_within_slice(NODE* n, size_t coord, size_t len) {
  if (!n) return false;
  if (n->key >= coord && n->key < coord + len) return true;
  else return false;
}


/*
 * Recursive removal of lists that may contain sub-lists. Stores the value ultimately removed in rm.
 */
bool remove_recursive(LIST* list, const size_t* coords, const size_t* offsets, const size_t* lengths, size_t r, const size_t& dim) {
//  std::cerr << "remove_recursive: " << r << std::endl;
  // find the current coordinates in the list
  NODE* prev    = find_preceding_from_list(list, coords[r] + offsets[r]);
  NODE* n;
  if (prev) n  = prev->next && node_is_within_slice(prev->next, coords[r] + offsets[r], lengths[r]) ? prev->next : NULL;
  else      n  = node_is_within_slice(list->first, coords[r] + offsets[r], lengths[r]) ? list->first : NULL;

  if (r < dim-1) { // nodes here are lists

    while (n) {
      // from that sub-list, call remove_recursive.
      bool remove_parent = remove_recursive(reinterpret_cast<LIST*>(n->val), coords, offsets, lengths, r+1, dim);

      if (remove_parent) { // now empty -- so remove the sub-list
//        std::cerr << r << ": removing parent list at " << n->key << std::endl;
        NM_FREE(remove_by_node(list, prev, n));

        if (prev) n  = prev->next && node_is_within_slice(prev->next, coords[r] + offsets[r], lengths[r]) ? prev->next : NULL;
        else      n  = node_is_within_slice(list->first, coords[r] + offsets[r], lengths[r]) ? list->first : NULL;
      } else {
        // Move forward to next node (list at n still exists)
        prev         = n;
        n            = prev->next && node_is_within_slice(prev->next, coords[r] + offsets[r], lengths[r]) ? prev->next : NULL;
      }

      // Iterate to next one.
      if (prev) n  = prev->next && node_is_within_slice(prev->next, coords[r] + offsets[r], lengths[r]) ? prev->next : NULL;
      else      n  = node_is_within_slice(list->first, coords[r] + offsets[r], lengths[r]) ? list->first : NULL;
    }

  } else { // nodes here are not lists, but actual values

    while (n) {
//      std::cerr << r << ": removing node at " << n->key << std::endl;
      NM_FREE(remove_by_node(list, prev, n));

      if (prev) n  = prev->next && node_is_within_slice(prev->next, coords[r] + offsets[r], lengths[r]) ? prev->next : NULL;
      else      n  = node_is_within_slice(list->first, coords[r] + offsets[r], lengths[r]) ? list->first : NULL;
    }
  }

  if (!list->first) return true; // if current list is now empty, signal its removal

  return false;
}

///////////
// Tests //
///////////


/////////////
// Utility //
/////////////

/*
 * Find some element in the list and return the node ptr for that key.
 */
NODE* find(LIST* list, size_t key) {
  NODE* f;
  if (!list->first) {
    // empty list -- does not exist
    return NULL;
  }

  // see if we can find it.
  f = find_nearest_from(list->first, key);
  
  if (!f || f->key == key) {
    return f;
  }
  
  return NULL;
}


/*
 * Find some element in the list and return the node ptr for that key.
 */
NODE* find_with_preceding(LIST* list, size_t key, NODE*& prev) {
  if (!prev) prev = list->first;
  if (!prev) return NULL; // empty list, does not exist

  if (prev->key == key) {
    NODE* n = prev;
    prev    = NULL;
    return n;
  }

  while (prev->next && prev->next->key < key) {
    prev = prev->next;
  }

  return prev->next;
}


/*
 * Finds the node that should go before whatever key we request, whether or not
 * that key is present.
 */
NODE* find_preceding_from_node(NODE* prev, size_t key) {
  NODE* curr = prev->next;

  if (!curr || key <= curr->key) {
    return prev;
    
  } else {
    return find_preceding_from_node(curr, key);
  }
}


/*
 * Returns NULL if the key being sought is first in the list or *should* be first in the list but is absent. Otherwise
 * returns the previous node to where that key is or should be.
 */
NODE* find_preceding_from_list(LIST* l, size_t key) {
  NODE* n = l->first;
  if (!n || n->key >= key)  return NULL;
  else                      return find_preceding_from_node(n, key);
}

/*
 * Finds the node or, if not present, the node that it should follow. NULL
 * indicates no preceding node.
 */
NODE* find_nearest(LIST* list, size_t key) {
  return find_nearest_from(list->first, key);
}

/*
 * Finds a node or the one immediately preceding it if it doesn't exist.
 */
NODE* find_nearest_from(NODE* prev, size_t key) {
  NODE* f;

  if (prev && prev->key == key) {
    return prev;
  }

  f = find_preceding_from_node(prev, key);

  if (!f->next) { // key exceeds final node; return final node.
    return f;
    
  } else if (key == f->next->key) { // node already present; return location
    return f->next;

  } else {
    return f;
  }
}

/////////////////////////
// Copying and Casting //
/////////////////////////


/*
 * Copy the contents of a list.
 */
template <typename LDType, typename RDType>
void cast_copy_contents(LIST* lhs, const LIST* rhs, size_t recursions) {
  NODE *lcurr, *rcurr;

  if (rhs->first) {
    // copy head node
    rcurr = rhs->first;
    lcurr = lhs->first = NM_ALLOC( NODE );

    while (rcurr) {
      lcurr->key = rcurr->key;

      if (recursions == 0) {
        // contents is some kind of value

        lcurr->val = NM_ALLOC( LDType );

        *reinterpret_cast<LDType*>(lcurr->val) = *reinterpret_cast<RDType*>( rcurr->val );

      } else {
        // contents is a list

        lcurr->val = NM_ALLOC( LIST );

        cast_copy_contents<LDType, RDType>(
          reinterpret_cast<LIST*>(lcurr->val),
          reinterpret_cast<LIST*>(rcurr->val),
          recursions-1
        );
      }

      if (rcurr->next) {
        lcurr->next = NM_ALLOC( NODE );

      } else {
        lcurr->next = NULL;
      }

      lcurr = lcurr->next;
      rcurr = rcurr->next;
    }

  } else {
    lhs->first = NULL;
  }
}

}} // end of namespace nm::list

extern "C" {

  /*
   * C access for copying the contents of a list.
   */
  void nm_list_cast_copy_contents(LIST* lhs, const LIST* rhs, nm::dtype_t lhs_dtype, nm::dtype_t rhs_dtype, size_t recursions) {
    LR_DTYPE_TEMPLATE_TABLE(nm::list::cast_copy_contents, void, LIST*, const LIST*, size_t);

    ttable[lhs_dtype][rhs_dtype](lhs, rhs, recursions);
  }

  /*
   * Sets up a hash with an appropriate default values. That means that if recursions == 0, the default value is default_value,
   * but if recursions == 1, the default value is going to be a hash with default value of default_value, and if recursions == 2,
   * the default value is going to be a hash with default value of hash with default value of default_value, and so on.
   * In other words, it's recursive.
   */
  static VALUE empty_list_to_hash(const nm::dtype_t dtype, size_t recursions, VALUE default_value) {
    VALUE h = rb_hash_new();
    if (recursions) {
      RHASH_SET_IFNONE(h, empty_list_to_hash(dtype, recursions-1, default_value));
    } else {
      RHASH_SET_IFNONE(h, default_value);
    }
    return h;
  }


  /*
   * Copy a list to a Ruby Hash
   */
  VALUE nm_list_copy_to_hash(const LIST* l, const nm::dtype_t dtype, size_t recursions, VALUE default_value) {

    // Create a hash with default values appropriately specified for a sparse matrix.
    VALUE h = empty_list_to_hash(dtype, recursions, default_value);

    if (l->first) {
      NODE* curr = l->first;

      while (curr) {

        size_t key = curr->key;

        if (recursions == 0) { // content is some kind of value
          rb_hash_aset(h, INT2FIX(key), nm::rubyobj_from_cval(curr->val, dtype).rval);
        } else { // content is a list
          rb_hash_aset(h, INT2FIX(key), nm_list_copy_to_hash(reinterpret_cast<const LIST*>(curr->val), dtype, recursions-1, default_value));
        }

        curr = curr->next;

      }

    }

    return h;
  }


} // end of extern "C" block


================================================
FILE: ext/nmatrix/util/sl_list.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == sl_list.h
//
// Singly-linked list implementation used for List Storage.

#ifndef SL_LIST_H
#define SL_LIST_H


/*
 * Standard Includes
 */

#include <ruby.h>
#include <type_traits>
#include <cstdlib>

/*
 * Project Includes
 */

#include "types.h"

#include "data/data.h"

#include "nmatrix.h"

namespace nm { namespace list {

/*
 * Macros
 */

/*
 * Types
 */

/*
 * Data
 */
 

/*
 * Functions
 */
 
////////////////
// Lifecycle //
///////////////

LIST*  create(void);
void  del(LIST* list, size_t recursions);
void  mark(LIST* list, size_t recursions);

///////////////
// Accessors //
///////////////

NODE* insert(LIST* list, bool replace, size_t key, void* val);
NODE* insert_copy(LIST *list, bool replace, size_t key, void *val, size_t size);
NODE* insert_first_node(LIST* list, size_t key, void* val, size_t val_size);
NODE* insert_first_list(LIST* list, size_t key, LIST* l);
NODE* insert_after(NODE* node, size_t key, void* val);
NODE* replace_insert_after(NODE* node, size_t key, void* val, bool copy, size_t copy_size);
void* remove(LIST* list, size_t key);
void* remove_by_node(LIST* list, NODE* prev, NODE* rm);
bool remove_recursive(LIST* list, const size_t* coords, const size_t* offset, const size_t* lengths, size_t r, const size_t& dim);
bool node_is_within_slice(NODE* n, size_t coord, size_t len);

template <typename Type>
inline NODE* insert_helper(LIST* list, NODE* node, size_t key, Type val) {
  Type* val_mem = NM_ALLOC(Type);
  *val_mem = val;
  
  if (node == NULL) {
    return insert(list, false, key, val_mem);
    
  } else {
    return insert_after(node, key, val_mem);
  }
}

template <typename Type>
inline NODE* insert_helper(LIST* list, NODE* node, size_t key, Type* ptr) {
  if (node == NULL) {
    return insert(list, false, key, ptr);
    
  } else {
    return insert_after(node, key, ptr);
  }
}

///////////
// Tests //
///////////


/////////////
// Utility //
/////////////

NODE* find(LIST* list, size_t key);
NODE* find_preceding_from_node(NODE* prev, size_t key);
NODE* find_preceding_from_list(LIST* l, size_t key);
NODE* find_nearest(LIST* list, size_t key);
NODE* find_nearest_from(NODE* prev, size_t key);

/////////////////////////
// Copying and Casting //
/////////////////////////

template <typename LDType, typename RDType>
void cast_copy_contents(LIST* lhs, const LIST* rhs, size_t recursions);

}} // end of namespace nm::list

extern "C" {
  void nm_list_cast_copy_contents(LIST* lhs, const LIST* rhs, nm::dtype_t lhs_dtype, nm::dtype_t rhs_dtype, size_t recursions);
  VALUE nm_list_copy_to_hash(const LIST* l, const nm::dtype_t dtype, size_t recursions, VALUE default_value);
} // end of extern "C" block

#endif // SL_LIST_H


================================================
FILE: ext/nmatrix/util/util.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == util.h
//
// Header file for utility functions and data.

#ifndef UTIL_H
#define UTIL_H

/*
 * Standard Includes
 */

/*
 * Project Includes
 */

#include "types.h"

/*
 * Macros
 */

/*
 * Types
 */

/*
 * Data
 */

/*
 * Functions
 */
namespace nm {
  template <typename Type>
  inline Type gcf(Type x, Type y) {
    Type t;

    if (x < 0) x = -x;
    if (y < 0) y = -y;

    if (x == 0) return y;
    if (y == 0) return x;

    while (x > 0) {
      t = x;
      x = y % x;
      y = t;
    }

    return y;
  }
} // end of namespace nm


#endif // UTIL_H


================================================
FILE: ext/nmatrix_atlas/extconf.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == extconf.rb
#
# This file checks for ATLAS and other necessary headers, and
# generates a Makefile for compiling NMatrix.

require "nmatrix/mkmf"

#$INSTALLFILES = [['nmatrix.h', '$(archdir)'], ['nmatrix.hpp', '$(archdir)'], ['nmatrix_config.h', '$(archdir)'], ['nm_memory.h', '$(archdir)']]
if /cygwin|mingw/ =~ RUBY_PLATFORM
  #$INSTALLFILES << ['libnmatrix.a', '$(archdir)']
end

$DEBUG = true
#not the right way to add this include directory
$CFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix",$CFLAGS].join(" ")
$CXXFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix",$CXXFLAGS].join(" ")
$CPPFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix",$CPPFLAGS].join(" ")

# When adding objects here, make sure their directories are included in CLEANOBJS down at the bottom of extconf.rb.
# Why not just autogenerate this list from all .c/.cpp files in directory?
basenames = %w{nmatrix_atlas math_atlas}
$objs = basenames.map { |b| "#{b}.o"   }
$srcs = basenames.map { |b| "#{b}.cpp" }

# The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
# --with-atlas-lib or --with-atlas-include and tell the compiler where to look
# for ATLAS. The same for all the others
#
#dir_config("clapack", ["/usr/local/atlas/include"], [])
#
#

# Is g++ having trouble finding your header files?
# Try this:
#   export C_INCLUDE_PATH=/usr/local/atlas/include
#   export CPLUS_INCLUDE_PATH=/usr/local/atlas/include
# (substituting in the path of your cblas.h and clapack.h for the path I used). -- JW 8/27/12

idefaults = {lapack: ["/usr/include/atlas"],
             cblas: ["/usr/local/atlas/include", "/usr/include/atlas"],
             atlas: ["/usr/local/atlas/include", "/usr/include/atlas"]}

# For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
# exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
# that Dir.exists?(d) for each.
ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
             cblas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
             atlas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) }}

if have_library("clapack") # Usually only applies for Mac OS X
  $libs += " -lclapack "
end

unless have_library("lapack")
  dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
end

unless have_library("cblas")
  dir_config("cblas", idefaults[:cblas], ldefaults[:cblas])
end

unless have_library("atlas")
  dir_config("atlas", idefaults[:atlas], ldefaults[:atlas])
end

# If BLAS and LAPACK headers are in an atlas directory, prefer those. Otherwise,
# we try our luck with the default location.
if have_header("atlas/cblas.h")
  have_header("atlas/clapack.h")
else
  have_header("cblas.h")
  have_header("clapack.h")
end


# Although have_func is supposed to take a list as its second argument, I find that it simply
# applies a :to_s to the second arg and doesn't actually check each one. We may want to put
# have_func calls inside an :each block which checks atlas/clapack.h, cblas.h, clapack.h, and
# lastly lapack.h. On Ubuntu, it only works if I use atlas/clapack.h. --@mohawkjohn 8/20/14
have_func("clapack_dgetrf", "atlas/clapack.h")
have_func("clapack_dgetri", "atlas/clapack.h")
have_func("dgesvd_", "clapack.h") # This may not do anything. dgesvd_ seems to be in LAPACK, not CLAPACK.

have_func("cblas_dgemm", "cblas.h")

#have_func("rb_scan_args", "ruby.h")

#find_library("lapack", "clapack_dgetrf")
#find_library("cblas", "cblas_dgemm")
#find_library("atlas", "ATL_dgemmNN")
# Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
$libs += " -llapack -lcblas -latlas "
#$libs += " -lprofiler "

create_conf_h("nmatrix_atlas_config.h")
create_makefile("nmatrix_atlas")

# to clean up object files in subdirectories:
open('Makefile', 'a') do |f|
  clean_objs_paths = %w{ }.map { |d| "#{d}/*.#{CONFIG["OBJEXT"]}" }
  f.write("CLEANOBJS := $(CLEANOBJS) #{clean_objs_paths.join(' ')}")
end


================================================
FILE: ext/nmatrix_atlas/math_atlas/cblas_templates_atlas.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == cblas_templaces_atlas.h
//
// Define template functions for calling CBLAS functions in the
// nm::math::atlas namespace.
//

#ifndef CBLAS_TEMPLATES_ATLAS_H
#define CBLAS_TEMPLATES_ATLAS_H

//includes so we have access to internal implementations
#include "math/rotg.h"
#include "math/rot.h"
#include "math/asum.h"
#include "math/nrm2.h"
#include "math/imax.h"
#include "math/scal.h"
#include "math/gemv.h"
#include "math/gemm.h"
#include "math/trsm.h"

namespace nm { namespace math { namespace atlas {

//Add cblas templates in the correct namespace
#include "math/cblas_templates_core.h"

//Add complex specializations for rot and rotg. These cblas functions are not
//part of the the standard CBLAS and so need to be in an nmatrix-atlas header.
template <>
inline void rotg(Complex64* a, Complex64* b, Complex64* c, Complex64* s) {
  cblas_crotg(a, b, c, s);
}

template <>
inline void rotg(Complex128* a, Complex128* b, Complex128* c, Complex128* s) {
  cblas_zrotg(a, b, c, s);
}
template <>
inline void rot(const int N, Complex64* X, const int incX, Complex64* Y, const int incY, const float c, const float s) {
  cblas_csrot(N, X, incX, Y, incY, c, s);
}

template <>
inline void rot(const int N, Complex128* X, const int incX, Complex128* Y, const int incY, const double c, const double s) {
  cblas_zdrot(N, X, incX, Y, incY, c, s);
}

}}} //nm::math::atlas

#endif


================================================
FILE: ext/nmatrix_atlas/math_atlas/clapack_templates.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == clapack_templates.h
//
// Collection of functions used to call ATLAS CLAPACK functions
// directly.
//

#ifndef CLAPACK_TEMPLATES_H
#define CLAPACK_TEMPLATES_H

//needed to get access to internal implementations
#include "math/getrf.h"
#include "math/getrs.h"

namespace nm { namespace math { namespace atlas {
//The first group of functions are those for which we have internal implementations.
//The internal implementations are defined in the ext/nmatrix/math directory
//and are the non-specialized
//forms of the template functions nm::math::whatever().
//They are are called below for non-BLAS
//types in the non-specialized form of the template nm::math::atlas::whatever().
//The specialized forms call the appropriate clapack functions.

//We also define the clapack_whatever() template
//functions below, which just cast
//their arguments to the appropriate types.


//getrf
template <typename DType>
inline int getrf(const enum CBLAS_ORDER order, const int m, const int n, DType* a, const int lda, int* ipiv) {
  return nm::math::getrf<DType>(order, m, n, a, lda, ipiv);
}

//Apparently CLAPACK isn't available on OS X, so we only define these
//specializations if available,
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
template <>
inline int getrf(const enum CBLAS_ORDER order, const int m, const int n, float* a, const int lda, int* ipiv) {
  return clapack_sgetrf(order, m, n, a, lda, ipiv);
}

template <>
inline int getrf(const enum CBLAS_ORDER order, const int m, const int n, double* a, const int lda, int* ipiv) {
  return clapack_dgetrf(order, m, n, a, lda, ipiv);
}

template <>
inline int getrf(const enum CBLAS_ORDER order, const int m, const int n, Complex64* a, const int lda, int* ipiv) {
  return clapack_cgetrf(order, m, n, a, lda, ipiv);
}

template <>
inline int getrf(const enum CBLAS_ORDER order, const int m, const int n, Complex128* a, const int lda, int* ipiv) {
  return clapack_zgetrf(order, m, n, a, lda, ipiv);
}
#endif

template <typename DType>
inline int clapack_getrf(const enum CBLAS_ORDER order, const int m, const int n, void* a, const int lda, int* ipiv) {
  return getrf<DType>(order, m, n, static_cast<DType*>(a), lda, ipiv);
}

//getrs
/*
 * Solves a system of linear equations A*X = B with a general NxN matrix A using the LU factorization computed by GETRF.
 *
 * From ATLAS 3.8.0.
 */
template <typename DType>
inline int getrs(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N, const int NRHS, const DType* A,
           const int lda, const int* ipiv, DType* B, const int ldb)
{
  return nm::math::getrs<DType>(Order, Trans, N, NRHS, A, lda, ipiv, B, ldb);
}

#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
template <>
inline int getrs(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N, const int NRHS, const float* A,
           const int lda, const int* ipiv, float* B, const int ldb)
{
  return clapack_sgetrs(Order, Trans, N, NRHS, A, lda, ipiv, B, ldb);
}

template <>
inline int getrs(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N, const int NRHS, const double* A,
           const int lda, const int* ipiv, double* B, const int ldb)
{
  return clapack_dgetrs(Order, Trans, N, NRHS, A, lda, ipiv, B, ldb);
}

template <>
inline int getrs(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N, const int NRHS, const Complex64* A,
           const int lda, const int* ipiv, Complex64* B, const int ldb)
{
  return clapack_cgetrs(Order, Trans, N, NRHS, A, lda, ipiv, static_cast<void*>(B), ldb);
}

template <>
inline int getrs(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N, const int NRHS, const Complex128* A,
           const int lda, const int* ipiv, Complex128* B, const int ldb)
{
  return clapack_zgetrs(Order, Trans, N, NRHS, A, lda, ipiv, static_cast<void*>(B), ldb);
}
#endif

template <typename DType>
inline int clapack_getrs(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const int n, const int nrhs,
                         const void* a, const int lda, const int* ipiv, void* b, const int ldb) {
  return getrs<DType>(order, trans, n, nrhs, static_cast<const DType*>(a), lda, ipiv, static_cast<DType*>(b), ldb);
}


//Functions without internal implementations below:

//getri
template <typename DType>
inline int getri(const enum CBLAS_ORDER order, const int n, DType* a, const int lda, const int* ipiv) {
  rb_raise(rb_eNotImpError, "getri not yet implemented for non-BLAS dtypes");
  return 0;
}

#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
template <>
inline int getri(const enum CBLAS_ORDER order, const int n, float* a, const int lda, const int* ipiv) {
  return clapack_sgetri(order, n, a, lda, ipiv);
}

template <>
inline int getri(const enum CBLAS_ORDER order, const int n, double* a, const int lda, const int* ipiv) {
  return clapack_dgetri(order, n, a, lda, ipiv);
}

template <>
inline int getri(const enum CBLAS_ORDER order, const int n, Complex64* a, const int lda, const int* ipiv) {
  return clapack_cgetri(order, n, a, lda, ipiv);
}

template <>
inline int getri(const enum CBLAS_ORDER order, const int n, Complex128* a, const int lda, const int* ipiv) {
  return clapack_zgetri(order, n, a, lda, ipiv);
}
#endif

template <typename DType>
inline int clapack_getri(const enum CBLAS_ORDER order, const int n, void* a, const int lda, const int* ipiv) {
  return getri<DType>(order, n, static_cast<DType*>(a), lda, ipiv);
}

//potrf
/*
 * From ATLAS 3.8.0:
 *
 * Computes one of two LU factorizations based on the setting of the Order
 * parameter, as follows:
 * ----------------------------------------------------------------------------
 *                       Order == CblasColMajor
 * Column-major factorization of form
 *   A = P * L * U
 * where P is a row-permutation matrix, L is lower triangular with unit
 * diagonal elements (lower trapazoidal if M > N), and U is upper triangular
 * (upper trapazoidal if M < N).
 *
 * ----------------------------------------------------------------------------
 *                       Order == CblasRowMajor
 * Row-major factorization of form
 *   A = P * L * U
 * where P is a column-permutation matrix, L is lower triangular (lower
 * trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
 * trapazoidal if M < N).
 *
 * ============================================================================
 * Let IERR be the return value of the function:
 *    If IERR == 0, successful exit.
 *    If (IERR < 0) the -IERR argument had an illegal value
 *    If (IERR > 0 && Order == CblasColMajor)
 *       U(i-1,i-1) is exactly zero.  The factorization has been completed,
 *       but the factor U is exactly singular, and division by zero will
 *       occur if it is used to solve a system of equations.
 *    If (IERR > 0 && Order == CblasRowMajor)
 *       L(i-1,i-1) is exactly zero.  The factorization has been completed,
 *       but the factor L is exactly singular, and division by zero will
 *       occur if it is used to solve a system of equations.
 */
template <typename DType>
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, DType* A, const int lda) {
#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
  rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
#else
  rb_raise(rb_eNotImpError, "only CLAPACK version implemented thus far");
#endif
  return 0;
}

#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
template <>
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, float* A, const int lda) {
  return clapack_spotrf(order, uplo, N, A, lda);
}

template <>
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, double* A, const int lda) {
  return clapack_dpotrf(order, uplo, N, A, lda);
}

template <>
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex64* A, const int lda) {
  return clapack_cpotrf(order, uplo, N, A, lda);
}

template <>
inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, Complex128* A, const int lda) {
  return clapack_zpotrf(order, uplo, N, A, lda);
}
#endif

template <typename DType>
inline int clapack_potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
  return potrf<DType>(order, uplo, n, static_cast<DType*>(a), lda);
}

//potri
template <typename DType>
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, DType* a, const int lda) {
  rb_raise(rb_eNotImpError, "potri not yet implemented for non-BLAS dtypes");
  return 0;
}


#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
template <>
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, float* a, const int lda) {
  return clapack_spotri(order, uplo, n, a, lda);
}

template <>
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, double* a, const int lda) {
  return clapack_dpotri(order, uplo, n, a, lda);
}

template <>
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, Complex64* a, const int lda) {
  return clapack_cpotri(order, uplo, n, a, lda);
}

template <>
inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, Complex128* a, const int lda) {
  return clapack_zpotri(order, uplo, n, a, lda);
}
#endif

template <typename DType>
inline int clapack_potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, void* a, const int lda) {
  return potri<DType>(order, uplo, n, static_cast<DType*>(a), lda);
}

//potrs
/*
 * Solves a system of linear equations A*X = B with a symmetric positive definite matrix A using the Cholesky factorization computed by POTRF.
 */
template <typename DType>
inline int potrs(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const int NRHS, const DType* A,
           const int lda, DType* B, const int ldb)
{
#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
  rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
#else
  rb_raise(rb_eNotImpError, "only CLAPACK version implemented thus far");
#endif
}

#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
template <>
inline int potrs<float> (const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const int NRHS, const float* A,
           const int lda, float* B, const int ldb)
{
  return clapack_spotrs(Order, Uplo, N, NRHS, A, lda, B, ldb);
}

template <>
inline int potrs<double>(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const int NRHS, const double* A,
           const int lda, double* B, const int ldb)
{
  return clapack_dpotrs(Order, Uplo, N, NRHS, A, lda, B, ldb);
}

template <>
inline int potrs<Complex64>(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const int NRHS, const Complex64* A,
           const int lda, Complex64* B, const int ldb)
{
  return clapack_cpotrs(Order, Uplo, N, NRHS, A, lda, static_cast<void *>(B), ldb);
}

template <>
inline int potrs<Complex128>(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const int NRHS, const Complex128* A,
           const int lda, Complex128* B, const int ldb)
{
  return clapack_zpotrs(Order, Uplo, N, NRHS, A, lda, static_cast<void *>(B), ldb);
}
#endif

template <typename DType>
inline int clapack_potrs(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, const int nrhs,
                         const void* a, const int lda, void* b, const int ldb) {
  return potrs<DType>(order, uplo, n, nrhs, static_cast<const DType*>(a), lda, static_cast<DType*>(b), ldb);
}

}}}

#endif


================================================
FILE: ext/nmatrix_atlas/math_atlas/geev.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == geev.h
//
// Header file for interface with LAPACK's xGEEV functions.
//

#ifndef GEEV_H
# define GEEV_H

extern "C" {
  void sgeev_(char* jobvl, char* jobvr, int* n, float* a,          int* lda, float* wr,  float* wi,  float* vl,          int* ldvl, float* vr,          int* ldvr, float* work,          int* lwork,                int* info);
  void dgeev_(char* jobvl, char* jobvr, int* n, double* a,         int* lda, double* wr, double* wi, double* vl,         int* ldvl, double* vr,         int* ldvr, double* work,         int* lwork,                int* info);
  void cgeev_(char* jobvl, char* jobvr, int* n, nm::Complex64* a,  int* lda, nm::Complex64* w,       nm::Complex64* vl,  int* ldvl, nm::Complex64* vr,  int* ldvr, nm::Complex64* work,  int* lwork, float* rwork,  int* info);
  void zgeev_(char* jobvl, char* jobvr, int* n, nm::Complex128* a, int* lda, nm::Complex128* w,      nm::Complex128* vl, int* ldvl, nm::Complex128* vr, int* ldvr, nm::Complex128* work, int* lwork, double* rwork, int* info);
}

namespace nm { namespace math { namespace atlas {

template <typename DType, typename CType>                         // wr
inline int geev(char jobvl, char jobvr, int n, DType* a, int lda, DType* w, DType* wi, DType* vl, int ldvl, DType* vr, int ldvr, DType* work, int lwork, CType* rwork) {
  rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
  return -1;
}

template <>
inline int geev(char jobvl, char jobvr, int n, float* a, int lda, float* w, float* wi, float* vl, int ldvl, float* vr, int ldvr, float* work, int lwork, float* rwork) {
  int info;
  sgeev_(&jobvl, &jobvr, &n, a, &lda, w, wi, vl, &ldvl, vr, &ldvr, work, &lwork, &info);
  return info;
}

template <>
inline int geev(char jobvl, char jobvr, int n, double* a, int lda, double* w, double* wi, double* vl, int ldvl, double* vr, int ldvr, double* work, int lwork, double* rwork) {
  int info;
  dgeev_(&jobvl, &jobvr, &n, a, &lda, w, wi, vl, &ldvl, vr, &ldvr, work, &lwork, &info);
  return info;
}

template <>
inline int geev(char jobvl, char jobvr, int n, Complex64* a, int lda, Complex64* w, Complex64* wi, Complex64* vl, int ldvl, Complex64* vr, int ldvr, Complex64* work, int lwork, float* rwork) {
  int info;
  cgeev_(&jobvl, &jobvr, &n, a, &lda, w, vl, &ldvl, vr, &ldvr, work, &lwork, rwork, &info);
  return info;
}

template <>
inline int geev(char jobvl, char jobvr, int n, Complex128* a, int lda, Complex128* w, Complex128* wi, Complex128* vl, int ldvl, Complex128* vr, int ldvr, Complex128* work, int lwork, double* rwork) {
  int info;
  zgeev_(&jobvl, &jobvr, &n, a, &lda, w, vl, &ldvl, vr, &ldvr, work, &lwork, rwork, &info);
  return info;
}

template <typename DType, typename CType>
inline int lapack_geev(char jobvl, char jobvr, int n, void* a, int lda, void* w, void* wi, void* vl, int ldvl, void* vr, int ldvr, void* work, int lwork, void* rwork) {
  return geev<DType,CType>(jobvl, jobvr, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<DType*>(w), reinterpret_cast<DType*>(wi), reinterpret_cast<DType*>(vl), ldvl, reinterpret_cast<DType*>(vr), ldvr, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
}

}}} // end nm::math::atlas

#endif // GEEV_H


================================================
FILE: ext/nmatrix_atlas/math_atlas/gesdd.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == gesdd.h
//
// Header file for interface with LAPACK's xGESDD functions.
//

#ifndef GESDD_H
# define GESDD_H

extern "C" {

  void sgesdd_(char*, int*, int*, float*, int*, float*, float*, int*, float*, int*, float*, int*, int*, int*);
  void dgesdd_(char*, int*, int*, double*, int*, double*, double*, int*, double*, int*, double*, int*, int*, int*);
  //the argument s is an array of real values and is returned as array of float/double
  void cgesdd_(char*, int*, int*, nm::Complex64*, int*, float* s, nm::Complex64*, int*, nm::Complex64*, int*, nm::Complex64*, int*, float*, int*, int*);
  void zgesdd_(char*, int*, int*, nm::Complex128*, int*, double* s, nm::Complex128*, int*, nm::Complex128*, int*, nm::Complex128*, int*, double*, int*, int*);
}

namespace nm {
  namespace math {
  namespace atlas {

    template <typename DType, typename CType>
    inline int gesdd(char jobz, int m, int n, DType* a, int lda, CType* s, DType* u, int ldu, DType* vt, int ldvt, DType* work, int lwork, int* iwork, CType* rwork) {
      rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
      return -1;
    }

    template <>
    inline int gesdd(char jobz, int m, int n, float* a, int lda, float* s, float* u, int ldu, float* vt, int ldvt, float* work, int lwork, int* iwork, float* rwork) {
      int info;
      sgesdd_(&jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, iwork, &info);
      return info;
    }

    template <>
    inline int gesdd(char jobz, int m, int n, double* a, int lda, double* s, double* u, int ldu, double* vt, int ldvt, double* work, int lwork, int* iwork, double* rwork) {
      int info;
      dgesdd_(&jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, iwork, &info);
      return info;
    }

    template <>
    inline int gesdd(char jobz, int m, int n, nm::Complex64* a, int lda, float* s, nm::Complex64* u, int ldu, nm::Complex64* vt, int ldvt, nm::Complex64* work, int lwork, int* iwork, float* rwork) {
      int info;
      cgesdd_(&jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, iwork, &info);
      return info;
    }

    template <>
    inline int gesdd(char jobz, int m, int n, nm::Complex128* a, int lda, double* s, nm::Complex128* u, int ldu, nm::Complex128* vt, int ldvt, nm::Complex128* work, int lwork, int* iwork, double* rwork) {
      int info;
      zgesdd_(&jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, iwork, &info);
      return info;
    }

  } // end of namespace atlas
  } // end of namespace math
} // end of namespace nm

#endif // GESDD_H


================================================
FILE: ext/nmatrix_atlas/math_atlas/gesvd.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == gesvd.h
//
// Header file for interface with LAPACK's xGESVD functions.
//

#ifndef GESVD_H
# define GESVD_H

extern "C" {
  void sgesvd_(char*, char*, int*, int*, float*, int*, float*, float*, int*, float*, int*, float*, int*, int*);
  void dgesvd_(char*, char*, int*, int*, double*, int*, double*, double*, int*, double*, int*, double*, int*, int*);
  //the argument s is an array of real values and is returned as array of float/double
  void cgesvd_(char*, char*, int*, int*, nm::Complex64*, int*, float* s, nm::Complex64*, int*, nm::Complex64*, int*, nm::Complex64*, int*, float*, int*);
  void zgesvd_(char*, char*, int*, int*, nm::Complex128*, int*, double* s, nm::Complex128*, int*, nm::Complex128*, int*, nm::Complex128*, int*, double*, int*);
}

namespace nm {
  namespace math {
  namespace atlas {

    template <typename DType, typename CType>
    inline int gesvd(char jobu, char jobvt, int m, int n, DType* a, int lda, CType* s, DType* u, int ldu, DType* vt, int ldvt, DType* work, int lwork, CType* rwork) {
      rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
      return -1;
    }

    template <>
    inline int gesvd(char jobu, char jobvt, int m, int n, float* a, int lda, float* s, float* u, int ldu, float* vt, int ldvt, float* work, int lwork, float* rwork) {
      int info;
      sgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, &info);
      return info;
    }

    template <>
    inline int gesvd(char jobu, char jobvt, int m, int n, double* a, int lda, double* s, double* u, int ldu, double* vt, int ldvt, double* work, int lwork, double* rwork) {
      int info;
      dgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, &info);
      return info;
    }

    template <>
    inline int gesvd(char jobu, char jobvt, int m, int n, nm::Complex64* a, int lda, float* s, nm::Complex64* u, int ldu, nm::Complex64* vt, int ldvt, nm::Complex64* work, int lwork, float* rwork) {
      int info;
      cgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, &info);
      return info;
    }

    template <>
    inline int gesvd(char jobu, char jobvt, int m, int n, nm::Complex128* a, int lda, double* s, nm::Complex128* u, int ldu, nm::Complex128* vt, int ldvt, nm::Complex128* work, int lwork, double* rwork) {
      int info;
      zgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, &info);
      return info;
    }

  } // end of namespace atlas
  } // end of namespace math
} // end of namespace nm
#endif // GESVD_H


================================================
FILE: ext/nmatrix_atlas/math_atlas/inc.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == inc.h
//
// Includes needed for LAPACK, CLAPACK, and CBLAS functions.
//

#ifndef INC_H
# define INC_H


extern "C" { // These need to be in an extern "C" block or you'll get all kinds of undefined symbol errors.
#if defined HAVE_CBLAS_H
  #include <cblas.h>
#elif defined HAVE_ATLAS_CBLAS_H
  #include <atlas/cblas.h>
#endif

#if defined HAVE_CLAPACK_H
  #include <clapack.h>
#elif defined HAVE_ATLAS_CLAPACK_H
  #include <atlas/clapack.h>
#endif
}

#endif // INC_H


================================================
FILE: ext/nmatrix_atlas/math_atlas.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == math_atlas.cpp
//
// Ruby-exposed CBLAS and LAPACK functions that call ATLAS
// functions.
//

/*
 * Project Includes
 */

#include "data/data.h"

#include "math_atlas/inc.h"

#include "math/util.h"

//BLAS
#include "math_atlas/cblas_templates_atlas.h"

//LAPACK
#include "math/laswp.h"
#include "math_atlas/clapack_templates.h"

#include "math_atlas/gesvd.h"
#include "math_atlas/gesdd.h"
#include "math_atlas/geev.h"


/*
 * Forward Declarations
 */

extern "C" {
  /* BLAS Level 1. */
  static VALUE nm_atlas_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
  static VALUE nm_atlas_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
  static VALUE nm_atlas_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
  static VALUE nm_atlas_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
  static VALUE nm_atlas_cblas_rotg(VALUE self, VALUE ab);
  static VALUE nm_atlas_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);

  /* BLAS Level 2. */
  static VALUE nm_atlas_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
                             VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);

  /* BLAS Level 3. */
  static VALUE nm_atlas_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
                             VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
  static VALUE nm_atlas_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
                             VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
  static VALUE nm_atlas_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
                             VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
  static VALUE nm_atlas_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
                             VALUE lda, VALUE beta, VALUE c, VALUE ldc);
  static VALUE nm_atlas_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
                             VALUE lda, VALUE beta, VALUE c, VALUE ldc);

  /* LAPACK. */
  static VALUE nm_atlas_has_clapack(VALUE self);
  static VALUE nm_atlas_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
  static VALUE nm_atlas_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
  static VALUE nm_atlas_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
  static VALUE nm_atlas_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
  static VALUE nm_atlas_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
  static VALUE nm_atlas_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
  static VALUE nm_atlas_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);

  static VALUE nm_atlas_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
  static VALUE nm_atlas_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lworkspace_size);
  static VALUE nm_atlas_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork);
}

////////////////////
// Math Functions //
////////////////////

namespace nm { 
  namespace math {
  namespace atlas {

    /*
     * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
     */
    template <typename DType, typename CType>
    inline static int lapack_gesvd(char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, void* rwork) {
      return gesvd<DType,CType>(jobu, jobvt, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<CType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, reinterpret_cast<CType*>(rwork));
    }

    /*
     * Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
     */
    template <typename DType, typename CType>
    inline static int lapack_gesdd(char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* work, int lwork, int* iwork, void* rwork) {
      return gesdd<DType,CType>(jobz, m, n, reinterpret_cast<DType*>(a), lda, reinterpret_cast<CType*>(s), reinterpret_cast<DType*>(u), ldu, reinterpret_cast<DType*>(vt), ldvt, reinterpret_cast<DType*>(work), lwork, iwork, reinterpret_cast<CType*>(rwork));
    }


  }
  }
}

extern "C" {

///////////////////
// Ruby Bindings //
///////////////////

void nm_math_init_atlas() {
  VALUE cNMatrix_ATLAS = rb_define_module_under(cNMatrix, "ATLAS");

  rb_define_singleton_method(cNMatrix, "has_clapack?", (METHOD)nm_atlas_has_clapack, 0);

  VALUE cNMatrix_ATLAS_LAPACK = rb_define_module_under(cNMatrix_ATLAS, "LAPACK");

  /* ATLAS-CLAPACK Functions */
  rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_getrf", (METHOD)nm_atlas_clapack_getrf, 5);
  rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_potrf", (METHOD)nm_atlas_clapack_potrf, 5);
  rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_getrs", (METHOD)nm_atlas_clapack_getrs, 9);
  rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_potrs", (METHOD)nm_atlas_clapack_potrs, 8);
  rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_getri", (METHOD)nm_atlas_clapack_getri, 5);
  rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_potri", (METHOD)nm_atlas_clapack_potri, 5);
  rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "clapack_laswp", (METHOD)nm_atlas_clapack_laswp, 7);

  /* Non-ATLAS regular LAPACK Functions called via Fortran interface */
  rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "lapack_gesvd", (METHOD)nm_atlas_lapack_gesvd, 12);
  rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "lapack_gesdd", (METHOD)nm_atlas_lapack_gesdd, 11);
  rb_define_singleton_method(cNMatrix_ATLAS_LAPACK, "lapack_geev",  (METHOD)nm_atlas_lapack_geev,  12);

  VALUE cNMatrix_ATLAS_BLAS = rb_define_module_under(cNMatrix_ATLAS, "BLAS");

  //BLAS Level 1
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_scal", (METHOD)nm_atlas_cblas_scal, 4);
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_nrm2", (METHOD)nm_atlas_cblas_nrm2, 3);
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_asum", (METHOD)nm_atlas_cblas_asum, 3);
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_rot",  (METHOD)nm_atlas_cblas_rot,  7);
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_rotg", (METHOD)nm_atlas_cblas_rotg, 1);
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_imax", (METHOD)nm_atlas_cblas_imax, 3);

  //BLAS Level 2
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_gemv", (METHOD)nm_atlas_cblas_gemv, 11);

  //BLAS Level 3
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_gemm", (METHOD)nm_atlas_cblas_gemm, 14);
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_trsm", (METHOD)nm_atlas_cblas_trsm, 12);
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_trmm", (METHOD)nm_atlas_cblas_trmm, 12);
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_syrk", (METHOD)nm_atlas_cblas_syrk, 11);
  rb_define_singleton_method(cNMatrix_ATLAS_BLAS, "cblas_herk", (METHOD)nm_atlas_cblas_herk, 11);

}

/*
 * Simple way to check from within Ruby code if clapack functions are available, without
 * having to wait around for an exception to be thrown.
 */
static VALUE nm_atlas_has_clapack(VALUE self) {
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
  return Qtrue;
#else
  return Qfalse;
#endif
}

/*
 * call-seq:
 *     NMatrix::BLAS.cblas_scal(n, alpha, vector, inc) -> NMatrix
 *
 * BLAS level 1 function +scal+. Works with all dtypes.
 *
 * Scale +vector+ in-place by +alpha+ and also return it. The operation is as
 * follows:
 *  x <- alpha * x
 *
 * - +n+ -> Number of elements of +vector+.
 * - +alpha+ -> Scalar value used in the operation.
 * - +vector+ -> NMatrix of shape [n,1] or [1,n]. Modified in-place.
 * - +inc+ -> Increment used in the scaling function. Should generally be 1.
 */
static VALUE nm_atlas_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE incx) {
  nm::dtype_t dtype = NM_DTYPE(vector);

  void* scalar = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  rubyval_to_cval(alpha, dtype, scalar);

  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_scal, void, const int n,
      const void* scalar, void* x, const int incx);

  ttable[dtype](FIX2INT(n), scalar, NM_STORAGE_DENSE(vector)->elements,
      FIX2INT(incx));

  return vector;
}

/*
 * Call any of the cblas_xrotg functions as directly as possible.
 *
 * xROTG computes the elements of a Givens plane rotation matrix such that:
 *
 *  |  c s |   | a |   | r |
 *  | -s c | * | b | = | 0 |
 *
 * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
 *
 * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
 *
 * This function differs from most of the other raw BLAS accessors. Instead of
 * providing a, b, c, s as arguments, you should only provide a and b (the
 * inputs), and you should provide them as the first two elements of any dense
 * NMatrix type.
 *
 * The outputs [c,s] will be returned in a Ruby Array at the end; the input
 * NMatrix will also be modified in-place.
 *
 * This function, like the other cblas_ functions, does minimal type-checking.
 */
static VALUE nm_atlas_cblas_rotg(VALUE self, VALUE ab) {
  static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
      NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
      nm::math::atlas::cblas_rotg<float>,
      nm::math::atlas::cblas_rotg<double>,
      nm::math::atlas::cblas_rotg<nm::Complex64>,
      nm::math::atlas::cblas_rotg<nm::Complex128>,
      NULL //nm::math::atlas::cblas_rotg<nm::RubyObject>
  };

  nm::dtype_t dtype = NM_DTYPE(ab);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
    return Qnil;

  } else {
    NM_CONSERVATIVE(nm_register_value(&self));
    NM_CONSERVATIVE(nm_register_value(&ab));
    void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
         *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);

    // extract A and B from the NVector (first two elements)
    void* pA = NM_STORAGE_DENSE(ab)->elements;
    void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
    // c and s are output

    ttable[dtype](pA, pB, pC, pS);

    VALUE result = rb_ary_new2(2);

    if (dtype == nm::RUBYOBJ) {
      rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
      rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
    } else {
      rb_ary_store(result, 0, nm::rubyobj_from_cval(pC, dtype).rval);
      rb_ary_store(result, 1, nm::rubyobj_from_cval(pS, dtype).rval);
    }
    NM_CONSERVATIVE(nm_unregister_value(&ab));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    return result;
  }
}


/*
 * Call any of the cblas_xrot functions as directly as possible.
 *
 * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
 *
 * It's tough to find documentation on xROT. Here are what we think the arguments are for:
 *  * n     :: number of elements to consider in x and y
 *  * x     :: a vector (expects an NVector)
 *  * incx  :: stride of x
 *  * y     :: a vector (expects an NVector)
 *  * incy  :: stride of y
 *  * c     :: cosine of the angle of rotation
 *  * s     :: sine of the angle of rotation
 *
 * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
 * will be float for Complex64 or double for Complex128.
 *
 * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_atlas_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
  static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
      NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
      nm::math::atlas::cblas_rot<float,float>,
      nm::math::atlas::cblas_rot<double,double>,
      nm::math::atlas::cblas_rot<nm::Complex64,float>,
      nm::math::atlas::cblas_rot<nm::Complex128,double>,
      nm::math::atlas::cblas_rot<nm::RubyObject,nm::RubyObject>
  };

  nm::dtype_t dtype = NM_DTYPE(x);


  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
    return Qfalse;
  } else {
    void *pC, *pS;

    // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
    if (dtype == nm::COMPLEX64) {
      pC = NM_ALLOCA_N(float,1);
      pS = NM_ALLOCA_N(float,1);
      rubyval_to_cval(c, nm::FLOAT32, pC);
      rubyval_to_cval(s, nm::FLOAT32, pS);
    } else if (dtype == nm::COMPLEX128) {
      pC = NM_ALLOCA_N(double,1);
      pS = NM_ALLOCA_N(double,1);
      rubyval_to_cval(c, nm::FLOAT64, pC);
      rubyval_to_cval(s, nm::FLOAT64, pS);
    } else {
      pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
      pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
      rubyval_to_cval(c, dtype, pC);
      rubyval_to_cval(s, dtype, pS);
    }


    ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);

    return Qtrue;
  }
}


/*
 * Call any of the cblas_xnrm2 functions as directly as possible.
 *
 * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
 *
 * Arguments:
 *  * n     :: length of x, must be at least 0
 *  * x     :: pointer to first entry of input vector
 *  * incx  :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
 *
 * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_atlas_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {

  static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
      NULL, NULL, NULL, NULL, NULL, // no help for integers
      nm::math::atlas::cblas_nrm2<float32_t>,
      nm::math::atlas::cblas_nrm2<float64_t>,
      nm::math::atlas::cblas_nrm2<nm::Complex64>,
      nm::math::atlas::cblas_nrm2<nm::Complex128>,
      nm::math::atlas::cblas_nrm2<nm::RubyObject>
  };

  nm::dtype_t dtype  = NM_DTYPE(x);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
    return Qnil;

  } else {
    // Determine the return dtype and allocate it
    nm::dtype_t rdtype = dtype;
    if      (dtype == nm::COMPLEX64)  rdtype = nm::FLOAT32;
    else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;

    void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);

    ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);

    return nm::rubyobj_from_cval(Result, rdtype).rval;
  }
}


/*
 * Call any of the cblas_xasum functions as directly as possible.
 *
 * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
 * of a vector x.
 *
 * Arguments:
 *  * n     :: length of x, must be at least 0
 *  * x     :: pointer to first entry of input vector
 *  * incx  :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
 *
 * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_atlas_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {

  static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
      nm::math::atlas::cblas_asum<uint8_t>,
      nm::math::atlas::cblas_asum<int8_t>,
      nm::math::atlas::cblas_asum<int16_t>,
      nm::math::atlas::cblas_asum<int32_t>,
      nm::math::atlas::cblas_asum<int64_t>,
      nm::math::atlas::cblas_asum<float32_t>,
      nm::math::atlas::cblas_asum<float64_t>,
      nm::math::atlas::cblas_asum<nm::Complex64>,
      nm::math::atlas::cblas_asum<nm::Complex128>,
      nm::math::atlas::cblas_asum<nm::RubyObject>
  };

  nm::dtype_t dtype  = NM_DTYPE(x);

  // Determine the return dtype and allocate it
  nm::dtype_t rdtype = dtype;
  if      (dtype == nm::COMPLEX64)  rdtype = nm::FLOAT32;
  else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;

  void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);

  ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);

  return nm::rubyobj_from_cval(Result, rdtype).rval;
}

/*
 * call-seq:
 *    NMatrix::BLAS.cblas_imax(n, vector, inc) -> Fixnum
 *
 * BLAS level 1 routine.
 *
 * Return the index of the largest element of +vector+.
 *
 * - +n+ -> Vector's size. Generally, you can use NMatrix#rows or NMatrix#cols.
 * - +vector+ -> A NMatrix of shape [n,1] or [1,n] with any dtype.
 * - +inc+ -> It's the increment used when searching. Use 1 except if you know
 *   what you're doing.
 */
static VALUE nm_atlas_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_imax, int, const int n, const void* x, const int incx);

  nm::dtype_t dtype = NM_DTYPE(x);

  int index = ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx));

  // Convert to Ruby's Int value.
  return INT2FIX(index);
}

/* Call any of the cblas_xgemv functions as directly as possible.
 *
 * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
 *
 *    y = alpha*op(A)*x + beta*y
 *
 * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
 *
 * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
 * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
 * expose the ultra-optimized ATLAS versions.
 *
 * == Arguments
 * See: http://www.netlib.org/blas/dgemm.f
 *
 * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_atlas_cblas_gemv(VALUE self,
                           VALUE trans_a,
                           VALUE m, VALUE n,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE x, VALUE incx,
                           VALUE beta,
                           VALUE y, VALUE incy)
{
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)

  nm::dtype_t dtype = NM_DTYPE(a);

  void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
       *pBeta  = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  rubyval_to_cval(alpha, dtype, pAlpha);
  rubyval_to_cval(beta, dtype, pBeta);

  return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
}

/* Call any of the cblas_xgemm functions as directly as possible.
 *
 * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
 *
 *    C = alpha*op(A)*op(B) + beta*C
 *
 * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
 *
 * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
 * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
 * expose the ultra-optimized ATLAS versions.
 *
 * == Arguments
 * See: http://www.netlib.org/blas/dgemm.f
 *
 * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_atlas_cblas_gemm(VALUE self,
                           VALUE order,
                           VALUE trans_a, VALUE trans_b,
                           VALUE m, VALUE n, VALUE k,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE b, VALUE ldb,
                           VALUE beta,
                           VALUE c, VALUE ldc)
{
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::atlas::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);

  nm::dtype_t dtype = NM_DTYPE(a);

  void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
       *pBeta  = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  rubyval_to_cval(alpha, dtype, pAlpha);
  rubyval_to_cval(beta, dtype, pBeta);

  ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));

  return c;
}


static VALUE nm_atlas_cblas_trsm(VALUE self,
                           VALUE order,
                           VALUE side, VALUE uplo,
                           VALUE trans_a, VALUE diag,
                           VALUE m, VALUE n,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE b, VALUE ldb)
{
  static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
                                        const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
                                        const int m, const int n, const void* alpha, const void* a,
                                        const int lda, void* b, const int ldb) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::atlas::cblas_trsm<float>,
      nm::math::atlas::cblas_trsm<double>,
      cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
      nm::math::atlas::cblas_trsm<nm::RubyObject>
  };

  nm::dtype_t dtype = NM_DTYPE(a);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
    rubyval_to_cval(alpha, dtype, pAlpha);

    ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
  }

  return Qtrue;
}

static VALUE nm_atlas_cblas_trmm(VALUE self,
                           VALUE order,
                           VALUE side, VALUE uplo,
                           VALUE trans_a, VALUE diag,
                           VALUE m, VALUE n,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE b, VALUE ldb)
{
  static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
                                        const enum CBLAS_SIDE, const enum CBLAS_UPLO,
                                        const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
                                        const int m, const int n, const void* alpha, const void* a,
                                        const int lda, void* b, const int ldb) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::atlas::cblas_trmm<float>,
      nm::math::atlas::cblas_trmm<double>,
      cblas_ctrmm, cblas_ztrmm, // call directly, same function signature!
      NULL
  };

  nm::dtype_t dtype = NM_DTYPE(a);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
  } else {
    void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
    rubyval_to_cval(alpha, dtype, pAlpha);

    ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
  }

  return b;
}

static VALUE nm_atlas_cblas_syrk(VALUE self,
                           VALUE order,
                           VALUE uplo,
                           VALUE trans,
                           VALUE n, VALUE k,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE beta,
                           VALUE c, VALUE ldc)
{
  static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
                                        const int n, const int k, const void* alpha, const void* a,
                                        const int lda, const void* beta, void* c, const int ldc) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::atlas::cblas_syrk<float>,
      nm::math::atlas::cblas_syrk<double>,
      cblas_csyrk, cblas_zsyrk, // call directly, same function signature!
      NULL
  };

  nm::dtype_t dtype = NM_DTYPE(a);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
         *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
    rubyval_to_cval(alpha, dtype, pAlpha);
    rubyval_to_cval(beta, dtype, pBeta);

    ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
  }

  return Qtrue;
}

static VALUE nm_atlas_cblas_herk(VALUE self,
                           VALUE order,
                           VALUE uplo,
                           VALUE trans,
                           VALUE n, VALUE k,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE beta,
                           VALUE c, VALUE ldc)
{

  nm::dtype_t dtype = NM_DTYPE(a);

  if (dtype == nm::COMPLEX64) {
    cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
  } else if (dtype == nm::COMPLEX128) {
    cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
  } else
    rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
  return Qtrue;
}

/*
 * Function signature conversion for calling CBLAS' gesvd functions as directly as possible.
 *
 * xGESVD computes the singular value decomposition (SVD) of a real
 * M-by-N matrix A, optionally computing the left and/or right singular
 * vectors. The SVD is written
 *
 *      A = U * SIGMA * transpose(V)
 *
 * where SIGMA is an M-by-N matrix which is zero except for its
 * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
 * V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
 * are the singular values of A; they are real and non-negative, and
 * are returned in descending order.  The first min(m,n) columns of
 * U and V are the left and right singular vectors of A.
 *
 * Note that the routine returns V**T, not V.
 */
static VALUE nm_atlas_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
  static int (*gesvd_table[nm::NUM_DTYPES])(char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, void* rwork) = {
    NULL, NULL, NULL, NULL, NULL, // no integer ops
    nm::math::atlas::lapack_gesvd<float,float>,
    nm::math::atlas::lapack_gesvd<double,double>,
    nm::math::atlas::lapack_gesvd<nm::Complex64,float>,
    nm::math::atlas::lapack_gesvd<nm::Complex128,double>,
    NULL // no Ruby objects
  };

  nm::dtype_t dtype = NM_DTYPE(a);


  if (!gesvd_table[dtype]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
    return Qfalse;
  } else {
    int M = FIX2INT(m),
        N = FIX2INT(n);

    int min_mn  = NM_MIN(M,N);
    int max_mn  = NM_MAX(M,N);

    char JOBU = lapack_svd_job_sym(jobu),
         JOBVT = lapack_svd_job_sym(jobvt);

    // only need rwork for complex matrices
    int rwork_size  = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
    void* rwork     = rwork_size > 0 ? NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
    int work_size   = FIX2INT(lwork);

    // ignore user argument for lwork if it's too small.
    work_size       = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
    void* work      = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);

    int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
      NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
      work, work_size, rwork);
    return INT2FIX(info);
  }
}

/*
 * Function signature conversion for calling CBLAS' gesdd functions as directly as possible.
 *
 * xGESDD uses a divide-and-conquer strategy to compute the singular value decomposition (SVD) of a real
 * M-by-N matrix A, optionally computing the left and/or right singular
 * vectors. The SVD is written
 *
 *      A = U * SIGMA * transpose(V)
 *
 * where SIGMA is an M-by-N matrix which is zero except for its
 * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
 * V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
 * are the singular values of A; they are real and non-negative, and
 * are returned in descending order.  The first min(m,n) columns of
 * U and V are the left and right singular vectors of A.
 *
 * Note that the routine returns V**T, not V.
 */
static VALUE nm_atlas_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE lwork) {
  static int (*gesdd_table[nm::NUM_DTYPES])(char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* work, int, int* iwork, void* rwork) = {
    NULL, NULL, NULL, NULL, NULL, // no integer ops
    nm::math::atlas::lapack_gesdd<float,float>,
    nm::math::atlas::lapack_gesdd<double,double>,
    nm::math::atlas::lapack_gesdd<nm::Complex64,float>,
    nm::math::atlas::lapack_gesdd<nm::Complex128,double>,
    NULL // no Ruby objects
  };

  nm::dtype_t dtype = NM_DTYPE(a);

  if (!gesdd_table[dtype]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
    return Qfalse;
  } else {
    int M = FIX2INT(m),
        N = FIX2INT(n);

    int min_mn  = NM_MIN(M,N);
    int max_mn  = NM_MAX(M,N);

    char JOBZ = lapack_svd_job_sym(jobz);

    // only need rwork for complex matrices
    void* rwork = NULL;

    int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
    if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
      int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
      rwork = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);

      if (JOBZ == 'N')      work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
      else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
      else                  work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 4*min_mn*min_mn + 4*min_mn));
    } else {
      if (JOBZ == 'N')      work_size = NM_MAX(work_size, 2*min_mn + max_mn);
      else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
      else                  work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
    }
    void* work  = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
    int* iwork  = NM_ALLOCA_N(int, 8*min_mn);

    int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
      NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
      work, work_size, iwork, rwork);
    return INT2FIX(info);
  }
}

/*
 * Function signature conversion for calling CBLAS' geev functions as directly as possible.
 *
 * GEEV computes for an N-by-N real nonsymmetric matrix A, the
 * eigenvalues and, optionally, the left and/or right eigenvectors.
 *
 * The right eigenvector v(j) of A satisfies
 *                    A * v(j) = lambda(j) * v(j)
 * where lambda(j) is its eigenvalue.
 *
 * The left eigenvector u(j) of A satisfies
 *                 u(j)**H * A = lambda(j) * u(j)**H
 * where u(j)**H denotes the conjugate transpose of u(j).
 *
 * The computed eigenvectors are normalized to have Euclidean norm
 * equal to 1 and largest component real.
 */
static VALUE nm_atlas_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr, VALUE lwork) {
  static int (*geev_table[nm::NUM_DTYPES])(char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int, void* work, int, void* rwork) = {
    NULL, NULL, NULL, NULL, NULL, // no integer ops
    nm::math::atlas::lapack_geev<float,float>,
    nm::math::atlas::lapack_geev<double,double>,
    nm::math::atlas::lapack_geev<nm::Complex64,float>,
    nm::math::atlas::lapack_geev<nm::Complex128,double>,
    NULL // no Ruby objects
  };

  nm::dtype_t dtype = NM_DTYPE(a);


  if (!geev_table[dtype]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
    return Qfalse;
  } else {
    int N = FIX2INT(n);

    char JOBVL = lapack_evd_job_sym(compute_left),
         JOBVR = lapack_evd_job_sym(compute_right);

    void* A  = NM_STORAGE_DENSE(a)->elements;
    void* WR = NM_STORAGE_DENSE(w)->elements;
    void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements;
    void* VL = JOBVL == 'V' ? NM_STORAGE_DENSE(vl)->elements : NULL;
    void* VR = JOBVR == 'V' ? NM_STORAGE_DENSE(vr)->elements : NULL;

    // only need rwork for complex matrices (wi == Qnil for complex)
    int rwork_size  = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
    void* rwork     = rwork_size > 0 ? NM_ALLOCA_N(char, rwork_size) : NULL;
    int work_size   = FIX2INT(lwork);
    void* work;

    int info;

    // if work size is 0 or -1, query.
    if (work_size <= 0) {
      work_size = -1;
      work = NM_ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
      info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
      work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
      // line above is basically: work_size = (int)(work[0]); // now have new work_size
      NM_FREE(work);
      if (info == 0)
        rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
      else return INT2FIX(info); // error of some kind on query!
    }

    // if work size is < 2*N, just set it to 2*N
    if (work_size < 2*N) work_size = 2*N;
    if (work_size < 3*N && (dtype == nm::FLOAT32 || dtype == nm::FLOAT64)) {
      work_size = JOBVL == 'V' || JOBVR == 'V' ? 4*N : 3*N;
    }

    // Allocate work array for actual run
    work = NM_ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);

    // Perform the actual calculation.
    info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);

    return INT2FIX(info);
  }
}

/* Call any of the clapack_xgetrf functions as directly as possible.
 *
 * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
 * matrix A using partial pivoting with row interchanges.
 *
 * The factorization has the form:
 *    A = P * L * U
 * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
 * and U is upper triangular (upper trapezoidal if m < n).
 *
 * This is the right-looking level 3 BLAS version of the algorithm.
 *
 * == Arguments
 * See: http://www.netlib.org/lapack/double/dgetrf.f
 * (You don't need argument 5; this is the value returned by this function.)
 *
 * You probably don't want to call this function. Instead, why don't you try clapack_getrf, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 *
 * Returns an array giving the pivot indices (normally these are argument #5).
 */
static VALUE nm_atlas_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::atlas::clapack_getrf<float>,
      nm::math::atlas::clapack_getrf<double>,
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
      clapack_cgetrf, clapack_zgetrf, // call directly, same function signature!
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
      nm::math::atlas::clapack_getrf<nm::Complex64>,
      nm::math::atlas::clapack_getrf<nm::Complex128>,
#endif
      nm::math::atlas::clapack_getrf<nm::RubyObject>
  };

  int M = FIX2INT(m),
      N = FIX2INT(n);

  // Allocate the pivot index array, which is of size MIN(M, N).
  size_t ipiv_size = std::min(M,N);
  int* ipiv = NM_ALLOCA_N(int, ipiv_size);

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    // Call either our version of getrf or the LAPACK version.
    ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
  }

  // Result will be stored in a. We return ipiv as an array.
  VALUE ipiv_array = rb_ary_new2(ipiv_size);
  for (size_t i = 0; i < ipiv_size; ++i) {
    rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
  }

  return ipiv_array;
}


/* Call any of the clapack_xpotrf functions as directly as possible.
 *
 * You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_atlas_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
#if !defined(HAVE_CLAPACK_H) && !defined(HAVE_ATLAS_CLAPACK_H)
  rb_raise(rb_eNotImpError, "potrf currently requires CLAPACK");
#endif

  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::atlas::clapack_potrf<float>,
      nm::math::atlas::clapack_potrf<double>,
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
      clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
      nm::math::atlas::clapack_potrf<nm::Complex64>,
      nm::math::atlas::clapack_potrf<nm::Complex128>,
#endif
      NULL
  };

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
    // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
    //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    // Call either our version of potrf or the LAPACK version.
    ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
  }

  return a;
}


/*
 * Call any of the clapack_xgetrs functions as directly as possible.
 */
static VALUE nm_atlas_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
                                       const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
                                       const int ldb) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::atlas::clapack_getrs<float>,
      nm::math::atlas::clapack_getrs<double>,
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
      clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
      nm::math::atlas::clapack_getrs<nm::Complex64>,
      nm::math::atlas::clapack_getrs<nm::Complex128>,
#endif
      nm::math::atlas::clapack_getrs<nm::RubyObject>
  };

  // Allocate the C version of the pivot index array
  int* ipiv_;
  if (TYPE(ipiv) != T_ARRAY) {
    rb_raise(rb_eArgError, "ipiv must be of type Array");
  } else {
    ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
    for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
      ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
    }
  }

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {

    // Call either our version of getrs or the LAPACK version.
    ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
                        ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
  }

  // b is both returned and modified directly in the argument list.
  return b;
}


/*
 * Call any of the clapack_xpotrs functions as directly as possible.
 */
static VALUE nm_atlas_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
                                       const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::atlas::clapack_potrs<float>,
      nm::math::atlas::clapack_potrs<double>,
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
      clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
      nm::math::atlas::clapack_potrs<nm::Complex64>,
      nm::math::atlas::clapack_potrs<nm::Complex128>,
#endif
      nm::math::atlas::clapack_potrs<nm::RubyObject>
  };


  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {

    // Call either our version of potrs or the LAPACK version.
    ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
                        NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
  }

  // b is both returned and modified directly in the argument list.
  return b;
}

/* Call any of the clapack_xgetri functions as directly as possible.
 *
 * You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 *
 * Returns an array giving the pivot indices (normally these are argument #5).
 */
static VALUE nm_atlas_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
#if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
  rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
#endif

  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::atlas::clapack_getri<float>,
      nm::math::atlas::clapack_getri<double>,
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
      clapack_cgetri, clapack_zgetri, // call directly, same function signature!
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
      nm::math::atlas::clapack_getri<nm::Complex64>,
      nm::math::atlas::clapack_getri<nm::Complex128>,
#endif
      NULL
  };

  // Allocate the C version of the pivot index array
  int* ipiv_;
  if (TYPE(ipiv) != T_ARRAY) {
    rb_raise(rb_eArgError, "ipiv must be of type Array");
  } else {
    ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
    for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
      ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
    }
  }

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
    // FIXME: Once non-BLAS dtypes are implemented, replace error above with the error below.
    //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    // Call either our version of getri or the LAPACK version.
    ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
  }

  return a;
}


/* Call any of the clapack_xpotri functions as directly as possible.
 *
 * You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_atlas_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
#if !defined (HAVE_CLAPACK_H) && !defined (HAVE_ATLAS_CLAPACK_H)
  rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
#endif

  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::atlas::clapack_potri<float>,
      nm::math::atlas::clapack_potri<double>,
#if defined (HAVE_CLAPACK_H) || defined (HAVE_ATLAS_CLAPACK_H)
      clapack_cpotri, clapack_zpotri, // call directly, same function signature!
#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
      nm::math::atlas::clapack_potri<nm::Complex64>,
      nm::math::atlas::clapack_potri<nm::Complex128>,
#endif
      NULL
  };

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
    // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
    //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    // Call either our version of getri or the LAPACK version.
    ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
  }

  return a;
}


/*
 * Call any of the clapack_xlaswp functions as directly as possible.
 *
 * Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
 * number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
 */
static VALUE nm_atlas_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
  //We have actually never used the ATLAS version of laswp. For the time being
  //I will leave it like that and just always call the internal implementation.
  //I don't know if there is a good reason for this or not.
  //Maybe because our internal version swaps columns instead of rows.
  static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
      nm::math::clapack_laswp<uint8_t>,
      nm::math::clapack_laswp<int8_t>,
      nm::math::clapack_laswp<int16_t>,
      nm::math::clapack_laswp<int32_t>,
      nm::math::clapack_laswp<int64_t>,
      nm::math::clapack_laswp<float>,
      nm::math::clapack_laswp<double>,
      nm::math::clapack_laswp<nm::Complex64>,
      nm::math::clapack_laswp<nm::Complex128>,
      nm::math::clapack_laswp<nm::RubyObject>
  };

  // Allocate the C version of the pivot index array
  int* ipiv_;
  if (TYPE(ipiv) != T_ARRAY) {
    rb_raise(rb_eArgError, "ipiv must be of type Array");
  } else {
    ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
    for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
      ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
    }
  }

  // Call either our version of laswp or the LAPACK version.
  ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));

  // a is both returned and modified directly in the argument list.
  return a;
}


}


================================================
FILE: ext/nmatrix_atlas/nmatrix_atlas.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == nmatrix_atlas.cpp
//
// Main file for nmatrix_atlas extension
//

#include <ruby.h>

#include "nmatrix.h"

#include "math_atlas/inc.h"

#include "data/data.h"

extern "C" {
void nm_math_init_atlas(); 

void Init_nmatrix_atlas() {
  nm_math_init_atlas();
}

}


================================================
FILE: ext/nmatrix_fftw/extconf.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == nmatrix_fftw/extconf.rb
#
# This file checks FFTW3 and other necessary headers/shared objects.

require 'nmatrix/mkmf'

fftw_libdir = RbConfig::CONFIG['libdir']
fftw_incdir = RbConfig::CONFIG['includedir']
fftw_srcdir = RbConfig::CONFIG['srcdir']

$CFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix -I$(srcdir)/lapacke/include",$CFLAGS].join(" ")
$CXXFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix -I$(srcdir)/lapacke/include -std=c++11",$CXXFLAGS].join(" ")
$CPPFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix -I$(srcdir)/lapacke/include -std=c++11",$CPPFLAGS].join(" ")

flags = " --include=#{fftw_incdir} --libdir=#{fftw_libdir}"

if have_library("fftw3")
  $CFLAGS += [" -lfftw3 -lm #{$CFLAGS} #{$flags}"].join(" ")
  dir_config('nmatrix_fftw', fftw_incdir, fftw_libdir)
  dir_config('nmatrix_fftw')
end

create_conf_h("nmatrix_fftw_config.h")
create_makefile("nmatrix_fftw")

# to clean up object files in subdirectories:
open('Makefile', 'a') do |f|
  clean_objs_paths = %w{ }.map { |d| "#{d}/*.#{CONFIG["OBJEXT"]}" }
  f.write("CLEANOBJS := $(CLEANOBJS) #{clean_objs_paths.join(' ')}")
end


================================================
FILE: ext/nmatrix_fftw/nmatrix_fftw.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == nmatrix_fftw.cpp
//
// Main file for nmatrix_fftw extension
//

#include <ruby.h>
#include <complex.h>
#include <fftw3.h>
#include "storage/common.h"
#include "nmatrix.h"
#include <iostream>

#define TYPE_COMPLEX_COMPLEX 0
#define TYPE_REAL_COMPLEX    1
#define TYPE_COMPLEX_REAL    2
#define TYPE_REAL_REAL       3

// @private Used internally by the C API.
static VALUE cNMatrix_FFTW_Plan_Data;

// @private Used internally by the C API.
//
// ADT for encapsulating various data structures required for sucessfully planning
//   and executing a fourier transform with FFTW. Uses void* pointers because 
//   input/output can be either double or fftw_complex depending on the type of
//   FFT being planned.
struct fftw_data {
  void* input; 
  void* output;
  fftw_plan plan;
};

// @private Used internally by the C API.
// Method used by Ruby GC for freeing memory allocated by FFTW.
static void nm_fftw_cleanup(fftw_data* d)
{
  xfree(d->input);
  xfree(d->output);
  fftw_destroy_plan(d->plan);
  xfree(d);
}

// @private Used internally by the C API.
// Used for converting a Ruby Array containing the shape to a C++ array of ints.
static int* nm_fftw_interpret_shape(VALUE rb_shape, const int dimensions)
{
  Check_Type(rb_shape, T_ARRAY);

  int *shape = new int[dimensions];
  const VALUE *arr = RARRAY_CONST_PTR(rb_shape);

  for (int i = 0; i < dimensions; ++i) {
    shape[i] = FIX2INT(arr[i]);
  }

  return shape;
}

// @private Used internally by the C API.
// Convert values passed in Ruby Array containing kinds of real-real transforms 
//   to a C array of ints. 
static void
nm_fftw_interpret_real_real_kind(VALUE real_real_kind, int *r2r_kinds)
{
  int size = RARRAY_LEN(real_real_kind);
  const VALUE *a = RARRAY_CONST_PTR(real_real_kind);
  for (int i = 0; i < size; ++i) { 
    r2r_kinds[i] = FIX2INT(a[i]); 
  }
}

// @private Used internally by the C API.
// Actually calls the FFTW planner routines based on the input/output and the
//   type of routine selected. Also allocates memory for input and output pointers.
static void nm_fftw_actually_create_plan(fftw_data* data, 
  size_t size, const int dimensions, const int* shape, int sign, unsigned flags, 
  VALUE rb_type, VALUE real_real_kind)
{
  switch (FIX2INT(rb_type))
  {
    case TYPE_COMPLEX_COMPLEX:
      data->input  = ALLOC_N(fftw_complex, size);
      data->output = ALLOC_N(fftw_complex, size);
      data->plan   = fftw_plan_dft(dimensions, shape, (fftw_complex*)data->input, 
        (fftw_complex*)data->output, sign, flags);
      break;
    case TYPE_REAL_COMPLEX:
      data->input  = ALLOC_N(double      , size);
      data->output = ALLOC_N(fftw_complex, size);
      data->plan   = fftw_plan_dft_r2c(dimensions, shape, (double*)data->input, 
        (fftw_complex*)data->output, flags);
      break;
    case TYPE_COMPLEX_REAL:
      data->input  = ALLOC_N(fftw_complex,  size);
      data->output = ALLOC_N(double      ,  size);
      data->plan   = fftw_plan_dft_c2r(dimensions, shape, (fftw_complex*)data->input, 
        (double*)data->output, flags);
      break;
    case TYPE_REAL_REAL:
      int* r2r_kinds = ALLOC_N(int, FIX2INT(real_real_kind));
      nm_fftw_interpret_real_real_kind(real_real_kind, r2r_kinds);
      data->input  = ALLOC_N(double, size);
      data->output = ALLOC_N(double, size);
      data->plan   = fftw_plan_r2r(dimensions, shape, (double*)data->input, 
        (double*)data->output, (fftw_r2r_kind*)r2r_kinds, flags);
      xfree(r2r_kinds);
      break;
  }
}

/** \brief Create a plan for performing the fourier transform based on input,
 * output pointers and the underlying hardware.
 *
 * @param[in] self          Object on which the function is called
 * @param[in] rb_shape      Shape of the plan.
 * @param[in] rb_size       Size of the plan.
 * @param[in] rb_dim        Dimension of the FFT to be performed.
 * @param[in] rb_flags      Number denoting the planner flags.
 * @param[in] rb_direction  Direction of FFT (can be -1 or +1). Specifies the
 *   sign of the exponent.
 * @param[in] rb_type       Number specifying the type of FFT being planned (one
 *    of :complex_complex, :complex_real, :real_complex and :real_real)
 * @param[in] rb_real_real_kind    Ruby Array specifying the kind of DFT to perform over
 *   each axis in case of a real input/real output FFT.
 *
 * \returns An object of type NMatrix::FFTW::Plan::Data that encapsulates the
 * plan and relevant input/output arrays.
 */
static VALUE nm_fftw_create_plan(VALUE self, VALUE rb_shape, VALUE rb_size,
  VALUE rb_dim, VALUE rb_flags, VALUE rb_direction, VALUE rb_type, VALUE rb_real_real_kind)
{ 
  const int dimensions = FIX2INT(rb_dim);
  const int* shape     = nm_fftw_interpret_shape(rb_shape, dimensions);
  size_t size          = FIX2INT(rb_size);
  int sign             = FIX2INT(rb_direction);
  unsigned flags       = FIX2INT(rb_flags);
  fftw_data *data      = ALLOC(fftw_data);

  nm_fftw_actually_create_plan(data, size, dimensions, shape, 
    sign, flags, rb_type, rb_real_real_kind);
  
  return Data_Wrap_Struct(cNMatrix_FFTW_Plan_Data, NULL, nm_fftw_cleanup, data);
}

// @private Used internally by the C API.
template <typename InputType>
static void nm_fftw_actually_set(VALUE nmatrix, VALUE plan_data)
{
  fftw_data* data;
  Data_Get_Struct(plan_data, fftw_data, data);
  memcpy((InputType*)data->input, (InputType*)NM_DENSE_ELEMENTS(nmatrix), 
    sizeof(InputType)*NM_DENSE_COUNT(nmatrix));
}

/** \brief Here is a brief description of what this function does.
 *
 * @param[in,out] self       Object on which the function is called.
 * @param[in]     plan_data  An internal data structure of type 
 *   NMatrix::FFTW::Plan::Data that is created by Data_Wrap_Struct in 
 *   nm_fftw_create_plan and which encapsulates the FFTW plan in a Ruby object.
 * @param[in]     nmatrix    An NMatrix object (pre-allocated) which contains the
 *   input elements for the fourier transform.
 * @param[in]     type       A number representing the type of fourier transform 
 *   being performed. (:complex_complex, :real_complex, :complex_real or :real_real).
 *
 * \returns self
 */
static VALUE nm_fftw_set_input(VALUE self, VALUE nmatrix, VALUE plan_data, 
  VALUE type)
{
  switch(FIX2INT(type))
  {
    case TYPE_COMPLEX_COMPLEX:
    case TYPE_COMPLEX_REAL:
      nm_fftw_actually_set<fftw_complex>(nmatrix, plan_data);
      break;
    case TYPE_REAL_COMPLEX:
    case TYPE_REAL_REAL:
      nm_fftw_actually_set<double>(nmatrix, plan_data);
      break;
    default:
      rb_raise(rb_eArgError, "Invalid type of DFT.");
  }

  return self;
}

// @private Used internally by the C API.
// Call fftw_execute and copy the resulting data into the nmatrix object.
template <typename OutputType>
static void nm_fftw_actually_execute(VALUE nmatrix, VALUE plan_data)
{
  fftw_data *data;
  Data_Get_Struct(plan_data, fftw_data, data);
  fftw_execute(data->plan);
  memcpy((OutputType*)NM_DENSE_ELEMENTS(nmatrix), (OutputType*)data->output, 
    sizeof(OutputType)*NM_DENSE_COUNT(nmatrix));
}

/** \brief Executes the fourier transform by calling the fftw_execute function 
 * and copies the output to the output nmatrix object, which can be accessed from
 * Ruby.
 *
 * @param[in] self       Object on which the function is called.
 * @param[in] plan_data  An internal data structure of type 
 *   NMatrix::FFTW::Plan::Data that is created by Data_Wrap_Struct in 
 *   nm_fftw_create_plan and which encapsulates the FFTW plan in a Ruby object.
 * @param[in] nmatrix    An NMatrix object (pre-allocated) into which the computed
 *   data will be copied.
 * @param[in] type       A number representing the type of fourier transform being
 *   performed. (:complex_complex, :real_complex, :complex_real or :real_real).
 *
 * \returns TrueClass if computation completed without errors.
 */
static VALUE nm_fftw_execute(VALUE self, VALUE nmatrix, VALUE plan_data, VALUE type)
{
  switch(FIX2INT(type))
  {
    case TYPE_COMPLEX_COMPLEX:
    case TYPE_REAL_COMPLEX:
      nm_fftw_actually_execute<fftw_complex>(nmatrix, plan_data);
      break;
    case TYPE_COMPLEX_REAL:
    case TYPE_REAL_REAL:
      nm_fftw_actually_execute<double>(nmatrix, plan_data);
      break;
    default:
      rb_raise(rb_eTypeError, "Invalid type of DFT.");
  }

  return Qtrue;
}

extern "C" {
  void Init_nmatrix_fftw() 
  {
    VALUE cNMatrix                = rb_define_class("NMatrix", rb_cObject);
    VALUE cNMatrix_FFTW           = rb_define_module_under(cNMatrix, "FFTW");
    VALUE cNMatrix_FFTW_Plan      = rb_define_class_under(cNMatrix_FFTW, "Plan", 
      rb_cObject);
    VALUE cNMatrix_FFTW_Plan_Data = rb_define_class_under(
      cNMatrix_FFTW_Plan, "Data", rb_cObject);

    rb_define_private_method(cNMatrix_FFTW_Plan, "c_create_plan", 
      (METHOD)nm_fftw_create_plan, 7);
    rb_define_private_method(cNMatrix_FFTW_Plan, "c_set_input",
      (METHOD)nm_fftw_set_input, 3);
    rb_define_private_method(cNMatrix_FFTW_Plan, "c_execute",
      (METHOD)nm_fftw_execute, 3);
  }
}


================================================
FILE: ext/nmatrix_java/nmatrix/math/MathHelper.java
================================================
import org.apache.commons.math3.util.FastMath;
import org.apache.commons.math3.special.Erf;
import org.apache.commons.math3.special.Gamma;

public class MathHelper{

  public static double[] log(double base, double[] arr){
    double[] result = new double[arr.length];
    for(int i = 0; i< arr.length; i++){
      result[i] = FastMath.log(base, arr[i]);
    } 
    return result;
  }

  public static double[] erf(double[] arr){
    double[] result = new double[arr.length];
    for(int i = 0; i< arr.length; i++){
      result[i] = Erf.erf(arr[i]);
    } 
    return result;
  }

  public static double[] erfc(double[] arr){
    double[] result = new double[arr.length];
    for(int i = 0; i< arr.length; i++){
      result[i] = Erf.erfc(arr[i]);
    } 
    return result;
  }

  public static double[] gamma(double[] arr){
    double[] result = new double[arr.length];
    for(int i = 0; i< arr.length; i++){
      result[i] = Gamma.gamma(arr[i]);
    } 
    return result;
  }

  public static double[] round(double[] arr){
    double[] result = new double[arr.length];
    for(int i = 0; i< arr.length; i++){
      result[i] = Math.round(arr[i]);
    } 
    return result;
  }

  public static double[] ldexp(double[] arr1, double[] arr){
    double[] result = new double[arr1.length];
    for(int i = 0; i< arr1.length; i++){
      result[i] = arr1[i] * Math.pow(2, arr[i]);
    } 
    return result;
  }

  public static double[] ldexpScalar(double val, double[] arr){
    double[] result = new double[arr.length];
    for(int i = 0; i< arr.length; i++){
      result[i] = val * Math.pow(2, arr[i]);
    } 
    return result;
  }

  public static double[] ldexpScalar2(double val, double[] arr){
    double[] result = new double[arr.length];
    for(int i = 0; i< arr.length; i++){
      result[i] = arr[i] * Math.pow(2, val);
    } 
    return result;
  }

  public static double[] hypot(double[] arr1, double[] arr2){
    double[] result = new double[arr1.length];
    for(int i = 0; i< arr1.length; i++){
      result[i] =  Math.sqrt(arr2[i] * arr2[i] + arr1[i] * arr1[i]);
    } 
    return result;
  }

  public static double[] hypotScalar(double val, double[] arr){
    double[] result = new double[arr.length];
    for(int i = 0; i< arr.length; i++){
      result[i] =  Math.sqrt(arr[i] * arr[i] + val * val);
    } 
    return result;
  }

  public static double[] atan2(double[] arr1, double[] arr2){
    double[] result = new double[arr1.length];
    for(int i = 0; i< arr1.length; i++){
      result[i] =  Math.atan2(arr2[i], arr1[i]);
    } 
    return result;
  }

  public static double[] atan2Scalar(double val, double[] arr){
    double[] result = new double[arr.length];
    for(int i = 0; i< arr.length; i++){
      result[i] =  Math.atan2(val, arr[i]);
    } 
    return result;
  }

  public static double[] atan2Scalar2(double val, double[] arr){
    double[] result = new double[arr.length];
    for(int i = 0; i< arr.length; i++){
      result[i] =  Math.atan2(arr[i], val);
    } 
    return result;
  }

}

================================================
FILE: ext/nmatrix_java/nmatrix/storage/dense/ArrayComparator.java
================================================
public class ArrayComparator{

  public static boolean equals(double[] arr1, double[] arr2){

    double delta = 1e-3;
    
    for(int i=0; i < arr1.length; i++){
      if(Math.abs(arr1[i] - arr2[i]) > delta){
        return false;
      }
    }
    
    return true;
  
  }
}

================================================
FILE: ext/nmatrix_java/nmatrix/util/ArrayGenerator.java
================================================
public class ArrayGenerator{
  // Array from Matrix begin

  public static double[] getArrayDouble(double[][] matrix, int row, int col)
  {
    double[] array = new double[row * col];
    for (int index=0, i=0; i < row ; i++){
        for (int j=0; j < col; j++){
            array[index] = matrix[i][j];
            index++;
        }
    }

    return array;
  }

  public static float[] getArrayFloat(float[][] matrix, int row, int col)
  {
    float[] array = new float[row * col];
    for (int index=0, i=0; i < row ; i++){
        for (int j=0; j < col; j++){
            array[index] = matrix[i][j];
            index++;
        }
    }

    return array;
  }

  public static double[] getArrayColMajorDouble(double[][] matrix, int col, int row)
  {
    double[] array = new double[row * col];
    for (int index=0, i=0; i < col ; i++){
        for (int j=0; j < row; j++){
            array[index] = matrix[i][j];
            index++;
        }
    }

    return array;
  }

  public static float[] getArrayColMajorFloat(float[][] matrix, int col, int row)
  {
    float[] array = new float[row * col];
    for (int index=0, i=0; i < col ; i++){
        for (int j=0; j < row; j++){
            array[index] = matrix[i][j];
            index++;
        }
    }

    return array;
  }

  public static float[] getArrayFloatFromDouble(double[][] matrix, int row, int col)
  {
    float[] array = new float[row * col];
    for (int index=0, i=0; i < row ; i++){
        for (int j=0; j < col; j++){
            array[index] = (float)matrix[i][j];
            index++;
        }
    }

    return array;
  }


  // Array from Matrix end

  // typeCast beging

  public static float[] convertArrayFloatFromDouble(double[] array){
    float[] resultArray = new float[array.length];
    for (int i=0; i < array.length ; i++){
      array[i] = (float)array[i];
    }

    return resultArray;
  }

  // typeCast end
}

================================================
FILE: ext/nmatrix_java/nmatrix/util/MatrixGenerator.java
================================================
public class MatrixGenerator
{ 

  // Matrix from Array begin
  public static float[][] getMatrixFloat(float[] array, int row, int col)
  {
    float[][] matrix = new float[row][col];
    for (int index=0, i=0; i < row ; i++){
        for (int j=0; j < col; j++){
            matrix[i][j]= array[index];
            index++;
        }
    }

    return matrix;
     
  }

  public static double[][] getMatrixDouble(double[] array, int row, int col)
  {
    double[][] matrix = new double[row][col];
    for (int index=0, i=0; i < row ; i++){
        for (int j=0; j < col; j++){
            matrix[i][j]= array[index];
            index++;
        }
    }

    return matrix;
     
  }

  public static float[][] getMatrixColMajorFloat(float[] array, int col, int row)
  {
    float[][] matrix = new float[col][row];
    for (int index=0, i=0; i < col ; i++){
        for (int j=0; j < row; j++){
            matrix[i][j]= array[index];
            index++;
        }
    }

    return matrix;
     
  }

  // Matrix from Array end


}

================================================
FILE: ext/nmatrix_java/nmatrix/util/WrapperType.java
================================================
//http://stackoverflow.com/questions/709961/determining-if-an-object-is-of-primitive-type

import java.util.*;

public class WrapperType
{
  // How-to-use?
  // public static void main(String[] args)
  // {        Object o = 1;
  //     System.out.println(isWrapperType(String.class));
  //     System.out.println(isWrapperType(o.getClass()));
  // }

  private static final Set<Class<?>> WRAPPER_TYPES = getWrapperTypes();

  public static boolean isWrapperType(Class<?> clazz)
  {
    return WRAPPER_TYPES.contains(clazz);
  }

  private static Set<Class<?>> getWrapperTypes()
  {
    Set<Class<?>> ret = new HashSet<Class<?>>();
    ret.add(Boolean.class);
    ret.add(Character.class);
    ret.add(Byte.class);
    ret.add(Short.class);
    ret.add(Integer.class);
    ret.add(Long.class);
    ret.add(Float.class);
    ret.add(Double.class);
    ret.add(Void.class);
    return ret;
  }
}

================================================
FILE: ext/nmatrix_java/test/AssertTests.java
================================================
import static org.hamcrest.CoreMatchers.allOf;
import static org.hamcrest.CoreMatchers.anyOf;
import static org.hamcrest.CoreMatchers.both;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.everyItem;
import static org.hamcrest.CoreMatchers.hasItems;
import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.CoreMatchers.sameInstance;
import static org.hamcrest.CoreMatchers.startsWith;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;

import java.util.Arrays;

import org.hamcrest.core.CombinableMatcher;
import org.junit.Test;

public class AssertTests {

  @Test
  public void testNMatrixdtypeEquals() {
    assertEquals("failure - dtypes are not equal", "tex", "text");
  }
  @Test
  public void testAssertArrayEquals() {
    byte[] expected = "trial".getBytes();
    byte[] actual = "trial".getBytes();
    assertArrayEquals("failure - byte arrays not same", expected, actual);
  }

  @Test
  public void testAssertEquals() {
    assertEquals("failure - strings are not equal", "tex", "text");
  }

  @Test
  public void testAssertFalse() {
    assertFalse("failure - should be false", false);
  }

  @Test
  public void testAssertNotNull() {
    assertNotNull("should not be null", new Object());
  }

  @Test
  public void testAssertNotSame() {
    assertNotSame("should not be same Object", new Object(), new Object());
  }

  @Test
  public void testAssertNull() {
    assertNull("should be null", null);
  }

  @Test
  public void testAssertSame() {
    Integer aNumber = Integer.valueOf(768);
    assertSame("should be same", aNumber, aNumber);
  }

  // JUnit Matchers assertThat
  @Test
  public void testAssertThatBothContainsString() {
    assertThat("albumen", both(containsString("a")).and(containsString("b")));
  }

  @Test
  public void testAssertThatHasItems() {
    assertThat(Arrays.asList("one", "two", "three"), hasItems("one", "three"));
  }

  @Test
  public void testAssertThatEveryItemContainsString() {
    assertThat(Arrays.asList(new String[] { "fun", "ban", "net" }), everyItem(containsString("n")));
  }

  // Core Hamcrest Matchers with assertThat
  @Test
  public void testAssertThatHamcrestCoreMatchers() {
    assertThat("good", allOf(equalTo("good"), startsWith("good")));
    assertThat("good", not(allOf(equalTo("bad"), equalTo("good"))));
    assertThat("good", anyOf(equalTo("bad"), equalTo("good")));
    assertThat(7, not(CombinableMatcher.<Integer> either(equalTo(3)).or(equalTo(4))));
    assertThat(new Object(), not(sameInstance(new Object())));
  }

  @Test
  public void testAssertTrue() {
    assertTrue("failure - should be true", true);
  }
}

================================================
FILE: ext/nmatrix_java/test/TestRunner.java
================================================
public class TestRunner{
	public static void main(String[] args) {
		AssertTests test1 = new AssertTests();
		test1.testAssertArrayEquals();
		test1.testAssertEquals();
	}
}

================================================
FILE: ext/nmatrix_lapacke/extconf.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == extconf.rb
#
# This file checks for ATLAS and other necessary headers, and
# generates a Makefile for compiling NMatrix.

require "nmatrix/mkmf"

#$INSTALLFILES = [['nmatrix.h', '$(archdir)'], ['nmatrix.hpp', '$(archdir)'], ['nmatrix_config.h', '$(archdir)'], ['nm_memory.h', '$(archdir)']]
if /cygwin|mingw/ =~ RUBY_PLATFORM
  #$INSTALLFILES << ['libnmatrix.a', '$(archdir)']
end

$DEBUG = true
#not the right way to add this include directory
$CFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix -I$(srcdir)/lapacke/include",$CFLAGS].join(" ")
$CXXFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix -I$(srcdir)/lapacke/include",$CXXFLAGS].join(" ")
$CPPFLAGS = ["-Wall -Werror=return-type -I$(srcdir)/../nmatrix -I$(srcdir)/lapacke/include",$CPPFLAGS].join(" ")

# When adding objects here, make sure their directories are included in CLEANOBJS down at the bottom of extconf.rb.
# Why not just autogenerate this list from all .c/.cpp files in directory?
basenames = %w{nmatrix_lapacke math_lapacke lapacke}
$objs = basenames.map { |b| "#{b}.o"   }
$srcs = basenames.map { |b| "#{b}.cpp" }

# For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
# exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
# that Dir.exists?(d) for each.
ldefaults = {lapack: ["/usr/local/lib"].delete_if { |d| !Dir.exists?(d) } }

# It is not clear how this variable should be defined, or if it is necessary at all. 
# See issue https://github.com/SciRuby/nmatrix/issues/403
idefaults = {lapack: [] }

unless have_library("lapack")
  dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
end

# Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
$libs += " -llapack "
#To use the Intel MKL, comment out the line above, and also comment out the bit above with have_library and dir_config for lapack.
#Then add something like the line below (for exactly what linker flags to use see https://software.intel.com/en-us/articles/intel-mkl-link-line-advisor ):
#$libs += " -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential "

create_conf_h("nmatrix_lapacke_config.h")
create_makefile("nmatrix_lapacke")

# to clean up object files in subdirectories:
open('Makefile', 'a') do |f|
  clean_objs_paths = %w{ }.map { |d| "#{d}/*.#{CONFIG["OBJEXT"]}" }
  f.write("CLEANOBJS := $(CLEANOBJS) #{clean_objs_paths.join(' ')}")
end


================================================
FILE: ext/nmatrix_lapacke/lapacke/include/lapacke.h
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#ifndef _LAPACKE_H_
#define _LAPACKE_H_

/*
*  Turn on HAVE_LAPACK_CONFIG_H to redefine C-LAPACK datatypes
*/
#ifdef HAVE_LAPACK_CONFIG_H
#include "lapacke_config.h"
#endif

#include <stdlib.h>

#ifndef lapack_int
#define lapack_int     int
#endif

#ifndef lapack_logical
#define lapack_logical lapack_int
#endif

/* Complex types are structures equivalent to the
* Fortran complex types COMPLEX(4) and COMPLEX(8).
*
* One can also redefine the types with his own types
* for example by including in the code definitions like
*
* #define lapack_complex_float std::complex<float>
* #define lapack_complex_double std::complex<double>
*
* or define these types in the command line:
*
* -Dlapack_complex_float="std::complex<float>"
* -Dlapack_complex_double="std::complex<double>"
*/

#ifndef LAPACK_COMPLEX_CUSTOM

/* Complex type (single precision) */
#ifndef lapack_complex_float
#include <complex.h>
#define lapack_complex_float    float _Complex
#endif

#ifndef lapack_complex_float_real
#define lapack_complex_float_real(z)       (creal(z))
#endif

#ifndef lapack_complex_float_imag
#define lapack_complex_float_imag(z)       (cimag(z))
#endif

lapack_complex_float lapack_make_complex_float( float re, float im );

/* Complex type (double precision) */
#ifndef lapack_complex_double
#include <complex.h>
#define lapack_complex_double   double _Complex
#endif

#ifndef lapack_complex_double_real
#define lapack_complex_double_real(z)      (creal(z))
#endif

#ifndef lapack_complex_double_imag
#define lapack_complex_double_imag(z)       (cimag(z))
#endif

lapack_complex_double lapack_make_complex_double( double re, double im );

#endif

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

#ifndef LAPACKE_malloc
#define LAPACKE_malloc( size ) malloc( size )
#endif
#ifndef LAPACKE_free
#define LAPACKE_free( p )      free( p )
#endif

#define LAPACK_C2INT( x ) (lapack_int)(*((float*)&x ))
#define LAPACK_Z2INT( x ) (lapack_int)(*((double*)&x ))

#define LAPACK_ROW_MAJOR               101
#define LAPACK_COL_MAJOR               102

#define LAPACK_WORK_MEMORY_ERROR       -1010
#define LAPACK_TRANSPOSE_MEMORY_ERROR  -1011

/* Callback logical functions of one, two, or three arguments are used
*  to select eigenvalues to sort to the top left of the Schur form.
*  The value is selected if function returns TRUE (non-zero). */

typedef lapack_logical (*LAPACK_S_SELECT2) ( const float*, const float* );
typedef lapack_logical (*LAPACK_S_SELECT3)
    ( const float*, const float*, const float* );
typedef lapack_logical (*LAPACK_D_SELECT2) ( const double*, const double* );
typedef lapack_logical (*LAPACK_D_SELECT3)
    ( const double*, const double*, const double* );

typedef lapack_logical (*LAPACK_C_SELECT1) ( const lapack_complex_float* );
typedef lapack_logical (*LAPACK_C_SELECT2)
    ( const lapack_complex_float*, const lapack_complex_float* );
typedef lapack_logical (*LAPACK_Z_SELECT1) ( const lapack_complex_double* );
typedef lapack_logical (*LAPACK_Z_SELECT2)
    ( const lapack_complex_double*, const lapack_complex_double* );

#include "lapacke_mangling.h"

#define LAPACK_lsame LAPACK_GLOBAL(lsame,LSAME)
lapack_logical LAPACK_lsame( char* ca,  char* cb,
                              lapack_int lca, lapack_int lcb );

/* C-LAPACK function prototypes */

lapack_int LAPACKE_sbdsdc( int matrix_order, char uplo, char compq,
                           lapack_int n, float* d, float* e, float* u,
                           lapack_int ldu, float* vt, lapack_int ldvt, float* q,
                           lapack_int* iq );
lapack_int LAPACKE_dbdsdc( int matrix_order, char uplo, char compq,
                           lapack_int n, double* d, double* e, double* u,
                           lapack_int ldu, double* vt, lapack_int ldvt,
                           double* q, lapack_int* iq );

lapack_int LAPACKE_sbdsqr( int matrix_order, char uplo, lapack_int n,
                           lapack_int ncvt, lapack_int nru, lapack_int ncc,
                           float* d, float* e, float* vt, lapack_int ldvt,
                           float* u, lapack_int ldu, float* c, lapack_int ldc );
lapack_int LAPACKE_dbdsqr( int matrix_order, char uplo, lapack_int n,
                           lapack_int ncvt, lapack_int nru, lapack_int ncc,
                           double* d, double* e, double* vt, lapack_int ldvt,
                           double* u, lapack_int ldu, double* c,
                           lapack_int ldc );
lapack_int LAPACKE_cbdsqr( int matrix_order, char uplo, lapack_int n,
                           lapack_int ncvt, lapack_int nru, lapack_int ncc,
                           float* d, float* e, lapack_complex_float* vt,
                           lapack_int ldvt, lapack_complex_float* u,
                           lapack_int ldu, lapack_complex_float* c,
                           lapack_int ldc );
lapack_int LAPACKE_zbdsqr( int matrix_order, char uplo, lapack_int n,
                           lapack_int ncvt, lapack_int nru, lapack_int ncc,
                           double* d, double* e, lapack_complex_double* vt,
                           lapack_int ldvt, lapack_complex_double* u,
                           lapack_int ldu, lapack_complex_double* c,
                           lapack_int ldc );

lapack_int LAPACKE_sdisna( char job, lapack_int m, lapack_int n, const float* d,
                           float* sep );
lapack_int LAPACKE_ddisna( char job, lapack_int m, lapack_int n,
                           const double* d, double* sep );

lapack_int LAPACKE_sgbbrd( int matrix_order, char vect, lapack_int m,
                           lapack_int n, lapack_int ncc, lapack_int kl,
                           lapack_int ku, float* ab, lapack_int ldab, float* d,
                           float* e, float* q, lapack_int ldq, float* pt,
                           lapack_int ldpt, float* c, lapack_int ldc );
lapack_int LAPACKE_dgbbrd( int matrix_order, char vect, lapack_int m,
                           lapack_int n, lapack_int ncc, lapack_int kl,
                           lapack_int ku, double* ab, lapack_int ldab,
                           double* d, double* e, double* q, lapack_int ldq,
                           double* pt, lapack_int ldpt, double* c,
                           lapack_int ldc );
lapack_int LAPACKE_cgbbrd( int matrix_order, char vect, lapack_int m,
                           lapack_int n, lapack_int ncc, lapack_int kl,
                           lapack_int ku, lapack_complex_float* ab,
                           lapack_int ldab, float* d, float* e,
                           lapack_complex_float* q, lapack_int ldq,
                           lapack_complex_float* pt, lapack_int ldpt,
                           lapack_complex_float* c, lapack_int ldc );
lapack_int LAPACKE_zgbbrd( int matrix_order, char vect, lapack_int m,
                           lapack_int n, lapack_int ncc, lapack_int kl,
                           lapack_int ku, lapack_complex_double* ab,
                           lapack_int ldab, double* d, double* e,
                           lapack_complex_double* q, lapack_int ldq,
                           lapack_complex_double* pt, lapack_int ldpt,
                           lapack_complex_double* c, lapack_int ldc );

lapack_int LAPACKE_sgbcon( int matrix_order, char norm, lapack_int n,
                           lapack_int kl, lapack_int ku, const float* ab,
                           lapack_int ldab, const lapack_int* ipiv, float anorm,
                           float* rcond );
lapack_int LAPACKE_dgbcon( int matrix_order, char norm, lapack_int n,
                           lapack_int kl, lapack_int ku, const double* ab,
                           lapack_int ldab, const lapack_int* ipiv,
                           double anorm, double* rcond );
lapack_int LAPACKE_cgbcon( int matrix_order, char norm, lapack_int n,
                           lapack_int kl, lapack_int ku,
                           const lapack_complex_float* ab, lapack_int ldab,
                           const lapack_int* ipiv, float anorm, float* rcond );
lapack_int LAPACKE_zgbcon( int matrix_order, char norm, lapack_int n,
                           lapack_int kl, lapack_int ku,
                           const lapack_complex_double* ab, lapack_int ldab,
                           const lapack_int* ipiv, double anorm,
                           double* rcond );

lapack_int LAPACKE_sgbequ( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku, const float* ab,
                           lapack_int ldab, float* r, float* c, float* rowcnd,
                           float* colcnd, float* amax );
lapack_int LAPACKE_dgbequ( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku, const double* ab,
                           lapack_int ldab, double* r, double* c,
                           double* rowcnd, double* colcnd, double* amax );
lapack_int LAPACKE_cgbequ( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku,
                           const lapack_complex_float* ab, lapack_int ldab,
                           float* r, float* c, float* rowcnd, float* colcnd,
                           float* amax );
lapack_int LAPACKE_zgbequ( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku,
                           const lapack_complex_double* ab, lapack_int ldab,
                           double* r, double* c, double* rowcnd, double* colcnd,
                           double* amax );

lapack_int LAPACKE_sgbequb( int matrix_order, lapack_int m, lapack_int n,
                            lapack_int kl, lapack_int ku, const float* ab,
                            lapack_int ldab, float* r, float* c, float* rowcnd,
                            float* colcnd, float* amax );
lapack_int LAPACKE_dgbequb( int matrix_order, lapack_int m, lapack_int n,
                            lapack_int kl, lapack_int ku, const double* ab,
                            lapack_int ldab, double* r, double* c,
                            double* rowcnd, double* colcnd, double* amax );
lapack_int LAPACKE_cgbequb( int matrix_order, lapack_int m, lapack_int n,
                            lapack_int kl, lapack_int ku,
                            const lapack_complex_float* ab, lapack_int ldab,
                            float* r, float* c, float* rowcnd, float* colcnd,
                            float* amax );
lapack_int LAPACKE_zgbequb( int matrix_order, lapack_int m, lapack_int n,
                            lapack_int kl, lapack_int ku,
                            const lapack_complex_double* ab, lapack_int ldab,
                            double* r, double* c, double* rowcnd,
                            double* colcnd, double* amax );

lapack_int LAPACKE_sgbrfs( int matrix_order, char trans, lapack_int n,
                           lapack_int kl, lapack_int ku, lapack_int nrhs,
                           const float* ab, lapack_int ldab, const float* afb,
                           lapack_int ldafb, const lapack_int* ipiv,
                           const float* b, lapack_int ldb, float* x,
                           lapack_int ldx, float* ferr, float* berr );
lapack_int LAPACKE_dgbrfs( int matrix_order, char trans, lapack_int n,
                           lapack_int kl, lapack_int ku, lapack_int nrhs,
                           const double* ab, lapack_int ldab, const double* afb,
                           lapack_int ldafb, const lapack_int* ipiv,
                           const double* b, lapack_int ldb, double* x,
                           lapack_int ldx, double* ferr, double* berr );
lapack_int LAPACKE_cgbrfs( int matrix_order, char trans, lapack_int n,
                           lapack_int kl, lapack_int ku, lapack_int nrhs,
                           const lapack_complex_float* ab, lapack_int ldab,
                           const lapack_complex_float* afb, lapack_int ldafb,
                           const lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_zgbrfs( int matrix_order, char trans, lapack_int n,
                           lapack_int kl, lapack_int ku, lapack_int nrhs,
                           const lapack_complex_double* ab, lapack_int ldab,
                           const lapack_complex_double* afb, lapack_int ldafb,
                           const lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_sgbrfsx( int matrix_order, char trans, char equed,
                            lapack_int n, lapack_int kl, lapack_int ku,
                            lapack_int nrhs, const float* ab, lapack_int ldab,
                            const float* afb, lapack_int ldafb,
                            const lapack_int* ipiv, const float* r,
                            const float* c, const float* b, lapack_int ldb,
                            float* x, lapack_int ldx, float* rcond, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_dgbrfsx( int matrix_order, char trans, char equed,
                            lapack_int n, lapack_int kl, lapack_int ku,
                            lapack_int nrhs, const double* ab, lapack_int ldab,
                            const double* afb, lapack_int ldafb,
                            const lapack_int* ipiv, const double* r,
                            const double* c, const double* b, lapack_int ldb,
                            double* x, lapack_int ldx, double* rcond,
                            double* berr, lapack_int n_err_bnds,
                            double* err_bnds_norm, double* err_bnds_comp,
                            lapack_int nparams, double* params );
lapack_int LAPACKE_cgbrfsx( int matrix_order, char trans, char equed,
                            lapack_int n, lapack_int kl, lapack_int ku,
                            lapack_int nrhs, const lapack_complex_float* ab,
                            lapack_int ldab, const lapack_complex_float* afb,
                            lapack_int ldafb, const lapack_int* ipiv,
                            const float* r, const float* c,
                            const lapack_complex_float* b, lapack_int ldb,
                            lapack_complex_float* x, lapack_int ldx,
                            float* rcond, float* berr, lapack_int n_err_bnds,
                            float* err_bnds_norm, float* err_bnds_comp,
                            lapack_int nparams, float* params );
lapack_int LAPACKE_zgbrfsx( int matrix_order, char trans, char equed,
                            lapack_int n, lapack_int kl, lapack_int ku,
                            lapack_int nrhs, const lapack_complex_double* ab,
                            lapack_int ldab, const lapack_complex_double* afb,
                            lapack_int ldafb, const lapack_int* ipiv,
                            const double* r, const double* c,
                            const lapack_complex_double* b, lapack_int ldb,
                            lapack_complex_double* x, lapack_int ldx,
                            double* rcond, double* berr, lapack_int n_err_bnds,
                            double* err_bnds_norm, double* err_bnds_comp,
                            lapack_int nparams, double* params );

lapack_int LAPACKE_sgbsv( int matrix_order, lapack_int n, lapack_int kl,
                          lapack_int ku, lapack_int nrhs, float* ab,
                          lapack_int ldab, lapack_int* ipiv, float* b,
                          lapack_int ldb );
lapack_int LAPACKE_dgbsv( int matrix_order, lapack_int n, lapack_int kl,
                          lapack_int ku, lapack_int nrhs, double* ab,
                          lapack_int ldab, lapack_int* ipiv, double* b,
                          lapack_int ldb );
lapack_int LAPACKE_cgbsv( int matrix_order, lapack_int n, lapack_int kl,
                          lapack_int ku, lapack_int nrhs,
                          lapack_complex_float* ab, lapack_int ldab,
                          lapack_int* ipiv, lapack_complex_float* b,
                          lapack_int ldb );
lapack_int LAPACKE_zgbsv( int matrix_order, lapack_int n, lapack_int kl,
                          lapack_int ku, lapack_int nrhs,
                          lapack_complex_double* ab, lapack_int ldab,
                          lapack_int* ipiv, lapack_complex_double* b,
                          lapack_int ldb );

lapack_int LAPACKE_sgbsvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int kl, lapack_int ku,
                           lapack_int nrhs, float* ab, lapack_int ldab,
                           float* afb, lapack_int ldafb, lapack_int* ipiv,
                           char* equed, float* r, float* c, float* b,
                           lapack_int ldb, float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr,
                           float* rpivot );
lapack_int LAPACKE_dgbsvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int kl, lapack_int ku,
                           lapack_int nrhs, double* ab, lapack_int ldab,
                           double* afb, lapack_int ldafb, lapack_int* ipiv,
                           char* equed, double* r, double* c, double* b,
                           lapack_int ldb, double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr,
                           double* rpivot );
lapack_int LAPACKE_cgbsvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int kl, lapack_int ku,
                           lapack_int nrhs, lapack_complex_float* ab,
                           lapack_int ldab, lapack_complex_float* afb,
                           lapack_int ldafb, lapack_int* ipiv, char* equed,
                           float* r, float* c, lapack_complex_float* b,
                           lapack_int ldb, lapack_complex_float* x,
                           lapack_int ldx, float* rcond, float* ferr,
                           float* berr, float* rpivot );
lapack_int LAPACKE_zgbsvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int kl, lapack_int ku,
                           lapack_int nrhs, lapack_complex_double* ab,
                           lapack_int ldab, lapack_complex_double* afb,
                           lapack_int ldafb, lapack_int* ipiv, char* equed,
                           double* r, double* c, lapack_complex_double* b,
                           lapack_int ldb, lapack_complex_double* x,
                           lapack_int ldx, double* rcond, double* ferr,
                           double* berr, double* rpivot );

lapack_int LAPACKE_sgbsvxx( int matrix_order, char fact, char trans,
                            lapack_int n, lapack_int kl, lapack_int ku,
                            lapack_int nrhs, float* ab, lapack_int ldab,
                            float* afb, lapack_int ldafb, lapack_int* ipiv,
                            char* equed, float* r, float* c, float* b,
                            lapack_int ldb, float* x, lapack_int ldx,
                            float* rcond, float* rpvgrw, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_dgbsvxx( int matrix_order, char fact, char trans,
                            lapack_int n, lapack_int kl, lapack_int ku,
                            lapack_int nrhs, double* ab, lapack_int ldab,
                            double* afb, lapack_int ldafb, lapack_int* ipiv,
                            char* equed, double* r, double* c, double* b,
                            lapack_int ldb, double* x, lapack_int ldx,
                            double* rcond, double* rpvgrw, double* berr,
                            lapack_int n_err_bnds, double* err_bnds_norm,
                            double* err_bnds_comp, lapack_int nparams,
                            double* params );
lapack_int LAPACKE_cgbsvxx( int matrix_order, char fact, char trans,
                            lapack_int n, lapack_int kl, lapack_int ku,
                            lapack_int nrhs, lapack_complex_float* ab,
                            lapack_int ldab, lapack_complex_float* afb,
                            lapack_int ldafb, lapack_int* ipiv, char* equed,
                            float* r, float* c, lapack_complex_float* b,
                            lapack_int ldb, lapack_complex_float* x,
                            lapack_int ldx, float* rcond, float* rpvgrw,
                            float* berr, lapack_int n_err_bnds,
                            float* err_bnds_norm, float* err_bnds_comp,
                            lapack_int nparams, float* params );
lapack_int LAPACKE_zgbsvxx( int matrix_order, char fact, char trans,
                            lapack_int n, lapack_int kl, lapack_int ku,
                            lapack_int nrhs, lapack_complex_double* ab,
                            lapack_int ldab, lapack_complex_double* afb,
                            lapack_int ldafb, lapack_int* ipiv, char* equed,
                            double* r, double* c, lapack_complex_double* b,
                            lapack_int ldb, lapack_complex_double* x,
                            lapack_int ldx, double* rcond, double* rpvgrw,
                            double* berr, lapack_int n_err_bnds,
                            double* err_bnds_norm, double* err_bnds_comp,
                            lapack_int nparams, double* params );

lapack_int LAPACKE_sgbtrf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku, float* ab,
                           lapack_int ldab, lapack_int* ipiv );
lapack_int LAPACKE_dgbtrf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku, double* ab,
                           lapack_int ldab, lapack_int* ipiv );
lapack_int LAPACKE_cgbtrf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku,
                           lapack_complex_float* ab, lapack_int ldab,
                           lapack_int* ipiv );
lapack_int LAPACKE_zgbtrf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku,
                           lapack_complex_double* ab, lapack_int ldab,
                           lapack_int* ipiv );

lapack_int LAPACKE_sgbtrs( int matrix_order, char trans, lapack_int n,
                           lapack_int kl, lapack_int ku, lapack_int nrhs,
                           const float* ab, lapack_int ldab,
                           const lapack_int* ipiv, float* b, lapack_int ldb );
lapack_int LAPACKE_dgbtrs( int matrix_order, char trans, lapack_int n,
                           lapack_int kl, lapack_int ku, lapack_int nrhs,
                           const double* ab, lapack_int ldab,
                           const lapack_int* ipiv, double* b, lapack_int ldb );
lapack_int LAPACKE_cgbtrs( int matrix_order, char trans, lapack_int n,
                           lapack_int kl, lapack_int ku, lapack_int nrhs,
                           const lapack_complex_float* ab, lapack_int ldab,
                           const lapack_int* ipiv, lapack_complex_float* b,
                           lapack_int ldb );
lapack_int LAPACKE_zgbtrs( int matrix_order, char trans, lapack_int n,
                           lapack_int kl, lapack_int ku, lapack_int nrhs,
                           const lapack_complex_double* ab, lapack_int ldab,
                           const lapack_int* ipiv, lapack_complex_double* b,
                           lapack_int ldb );

lapack_int LAPACKE_sgebak( int matrix_order, char job, char side, lapack_int n,
                           lapack_int ilo, lapack_int ihi, const float* scale,
                           lapack_int m, float* v, lapack_int ldv );
lapack_int LAPACKE_dgebak( int matrix_order, char job, char side, lapack_int n,
                           lapack_int ilo, lapack_int ihi, const double* scale,
                           lapack_int m, double* v, lapack_int ldv );
lapack_int LAPACKE_cgebak( int matrix_order, char job, char side, lapack_int n,
                           lapack_int ilo, lapack_int ihi, const float* scale,
                           lapack_int m, lapack_complex_float* v,
                           lapack_int ldv );
lapack_int LAPACKE_zgebak( int matrix_order, char job, char side, lapack_int n,
                           lapack_int ilo, lapack_int ihi, const double* scale,
                           lapack_int m, lapack_complex_double* v,
                           lapack_int ldv );

lapack_int LAPACKE_sgebal( int matrix_order, char job, lapack_int n, float* a,
                           lapack_int lda, lapack_int* ilo, lapack_int* ihi,
                           float* scale );
lapack_int LAPACKE_dgebal( int matrix_order, char job, lapack_int n, double* a,
                           lapack_int lda, lapack_int* ilo, lapack_int* ihi,
                           double* scale );
lapack_int LAPACKE_cgebal( int matrix_order, char job, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* ilo, lapack_int* ihi, float* scale );
lapack_int LAPACKE_zgebal( int matrix_order, char job, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* ilo, lapack_int* ihi, double* scale );

lapack_int LAPACKE_sgebrd( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, float* d, float* e,
                           float* tauq, float* taup );
lapack_int LAPACKE_dgebrd( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, double* d, double* e,
                           double* tauq, double* taup );
lapack_int LAPACKE_cgebrd( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda, float* d,
                           float* e, lapack_complex_float* tauq,
                           lapack_complex_float* taup );
lapack_int LAPACKE_zgebrd( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda, double* d,
                           double* e, lapack_complex_double* tauq,
                           lapack_complex_double* taup );

lapack_int LAPACKE_sgecon( int matrix_order, char norm, lapack_int n,
                           const float* a, lapack_int lda, float anorm,
                           float* rcond );
lapack_int LAPACKE_dgecon( int matrix_order, char norm, lapack_int n,
                           const double* a, lapack_int lda, double anorm,
                           double* rcond );
lapack_int LAPACKE_cgecon( int matrix_order, char norm, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           float anorm, float* rcond );
lapack_int LAPACKE_zgecon( int matrix_order, char norm, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           double anorm, double* rcond );

lapack_int LAPACKE_sgeequ( int matrix_order, lapack_int m, lapack_int n,
                           const float* a, lapack_int lda, float* r, float* c,
                           float* rowcnd, float* colcnd, float* amax );
lapack_int LAPACKE_dgeequ( int matrix_order, lapack_int m, lapack_int n,
                           const double* a, lapack_int lda, double* r,
                           double* c, double* rowcnd, double* colcnd,
                           double* amax );
lapack_int LAPACKE_cgeequ( int matrix_order, lapack_int m, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           float* r, float* c, float* rowcnd, float* colcnd,
                           float* amax );
lapack_int LAPACKE_zgeequ( int matrix_order, lapack_int m, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           double* r, double* c, double* rowcnd, double* colcnd,
                           double* amax );

lapack_int LAPACKE_sgeequb( int matrix_order, lapack_int m, lapack_int n,
                            const float* a, lapack_int lda, float* r, float* c,
                            float* rowcnd, float* colcnd, float* amax );
lapack_int LAPACKE_dgeequb( int matrix_order, lapack_int m, lapack_int n,
                            const double* a, lapack_int lda, double* r,
                            double* c, double* rowcnd, double* colcnd,
                            double* amax );
lapack_int LAPACKE_cgeequb( int matrix_order, lapack_int m, lapack_int n,
                            const lapack_complex_float* a, lapack_int lda,
                            float* r, float* c, float* rowcnd, float* colcnd,
                            float* amax );
lapack_int LAPACKE_zgeequb( int matrix_order, lapack_int m, lapack_int n,
                            const lapack_complex_double* a, lapack_int lda,
                            double* r, double* c, double* rowcnd,
                            double* colcnd, double* amax );

lapack_int LAPACKE_sgees( int matrix_order, char jobvs, char sort,
                          LAPACK_S_SELECT2 select, lapack_int n, float* a,
                          lapack_int lda, lapack_int* sdim, float* wr,
                          float* wi, float* vs, lapack_int ldvs );
lapack_int LAPACKE_dgees( int matrix_order, char jobvs, char sort,
                          LAPACK_D_SELECT2 select, lapack_int n, double* a,
                          lapack_int lda, lapack_int* sdim, double* wr,
                          double* wi, double* vs, lapack_int ldvs );
lapack_int LAPACKE_cgees( int matrix_order, char jobvs, char sort,
                          LAPACK_C_SELECT1 select, lapack_int n,
                          lapack_complex_float* a, lapack_int lda,
                          lapack_int* sdim, lapack_complex_float* w,
                          lapack_complex_float* vs, lapack_int ldvs );
lapack_int LAPACKE_zgees( int matrix_order, char jobvs, char sort,
                          LAPACK_Z_SELECT1 select, lapack_int n,
                          lapack_complex_double* a, lapack_int lda,
                          lapack_int* sdim, lapack_complex_double* w,
                          lapack_complex_double* vs, lapack_int ldvs );

lapack_int LAPACKE_sgeesx( int matrix_order, char jobvs, char sort,
                           LAPACK_S_SELECT2 select, char sense, lapack_int n,
                           float* a, lapack_int lda, lapack_int* sdim,
                           float* wr, float* wi, float* vs, lapack_int ldvs,
                           float* rconde, float* rcondv );
lapack_int LAPACKE_dgeesx( int matrix_order, char jobvs, char sort,
                           LAPACK_D_SELECT2 select, char sense, lapack_int n,
                           double* a, lapack_int lda, lapack_int* sdim,
                           double* wr, double* wi, double* vs, lapack_int ldvs,
                           double* rconde, double* rcondv );
lapack_int LAPACKE_cgeesx( int matrix_order, char jobvs, char sort,
                           LAPACK_C_SELECT1 select, char sense, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* sdim, lapack_complex_float* w,
                           lapack_complex_float* vs, lapack_int ldvs,
                           float* rconde, float* rcondv );
lapack_int LAPACKE_zgeesx( int matrix_order, char jobvs, char sort,
                           LAPACK_Z_SELECT1 select, char sense, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* sdim, lapack_complex_double* w,
                           lapack_complex_double* vs, lapack_int ldvs,
                           double* rconde, double* rcondv );

lapack_int LAPACKE_sgeev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, float* a, lapack_int lda, float* wr,
                          float* wi, float* vl, lapack_int ldvl, float* vr,
                          lapack_int ldvr );
lapack_int LAPACKE_dgeev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, double* a, lapack_int lda, double* wr,
                          double* wi, double* vl, lapack_int ldvl, double* vr,
                          lapack_int ldvr );
lapack_int LAPACKE_cgeev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, lapack_complex_float* a, lapack_int lda,
                          lapack_complex_float* w, lapack_complex_float* vl,
                          lapack_int ldvl, lapack_complex_float* vr,
                          lapack_int ldvr );
lapack_int LAPACKE_zgeev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, lapack_complex_double* a,
                          lapack_int lda, lapack_complex_double* w,
                          lapack_complex_double* vl, lapack_int ldvl,
                          lapack_complex_double* vr, lapack_int ldvr );

lapack_int LAPACKE_sgeevx( int matrix_order, char balanc, char jobvl,
                           char jobvr, char sense, lapack_int n, float* a,
                           lapack_int lda, float* wr, float* wi, float* vl,
                           lapack_int ldvl, float* vr, lapack_int ldvr,
                           lapack_int* ilo, lapack_int* ihi, float* scale,
                           float* abnrm, float* rconde, float* rcondv );
lapack_int LAPACKE_dgeevx( int matrix_order, char balanc, char jobvl,
                           char jobvr, char sense, lapack_int n, double* a,
                           lapack_int lda, double* wr, double* wi, double* vl,
                           lapack_int ldvl, double* vr, lapack_int ldvr,
                           lapack_int* ilo, lapack_int* ihi, double* scale,
                           double* abnrm, double* rconde, double* rcondv );
lapack_int LAPACKE_cgeevx( int matrix_order, char balanc, char jobvl,
                           char jobvr, char sense, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* w, lapack_complex_float* vl,
                           lapack_int ldvl, lapack_complex_float* vr,
                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,
                           float* scale, float* abnrm, float* rconde,
                           float* rcondv );
lapack_int LAPACKE_zgeevx( int matrix_order, char balanc, char jobvl,
                           char jobvr, char sense, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* w, lapack_complex_double* vl,
                           lapack_int ldvl, lapack_complex_double* vr,
                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,
                           double* scale, double* abnrm, double* rconde,
                           double* rcondv );

lapack_int LAPACKE_sgehrd( int matrix_order, lapack_int n, lapack_int ilo,
                           lapack_int ihi, float* a, lapack_int lda,
                           float* tau );
lapack_int LAPACKE_dgehrd( int matrix_order, lapack_int n, lapack_int ilo,
                           lapack_int ihi, double* a, lapack_int lda,
                           double* tau );
lapack_int LAPACKE_cgehrd( int matrix_order, lapack_int n, lapack_int ilo,
                           lapack_int ihi, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* tau );
lapack_int LAPACKE_zgehrd( int matrix_order, lapack_int n, lapack_int ilo,
                           lapack_int ihi, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* tau );

lapack_int LAPACKE_sgejsv( int matrix_order, char joba, char jobu, char jobv,
                           char jobr, char jobt, char jobp, lapack_int m,
                           lapack_int n, float* a, lapack_int lda, float* sva,
                           float* u, lapack_int ldu, float* v, lapack_int ldv,
                           float* stat, lapack_int* istat );
lapack_int LAPACKE_dgejsv( int matrix_order, char joba, char jobu, char jobv,
                           char jobr, char jobt, char jobp, lapack_int m,
                           lapack_int n, double* a, lapack_int lda, double* sva,
                           double* u, lapack_int ldu, double* v, lapack_int ldv,
                           double* stat, lapack_int* istat );

lapack_int LAPACKE_sgelq2( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, float* tau );
lapack_int LAPACKE_dgelq2( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, double* tau );
lapack_int LAPACKE_cgelq2( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* tau );
lapack_int LAPACKE_zgelq2( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* tau );

lapack_int LAPACKE_sgelqf( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, float* tau );
lapack_int LAPACKE_dgelqf( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, double* tau );
lapack_int LAPACKE_cgelqf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* tau );
lapack_int LAPACKE_zgelqf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* tau );

lapack_int LAPACKE_sgels( int matrix_order, char trans, lapack_int m,
                          lapack_int n, lapack_int nrhs, float* a,
                          lapack_int lda, float* b, lapack_int ldb );
lapack_int LAPACKE_dgels( int matrix_order, char trans, lapack_int m,
                          lapack_int n, lapack_int nrhs, double* a,
                          lapack_int lda, double* b, lapack_int ldb );
lapack_int LAPACKE_cgels( int matrix_order, char trans, lapack_int m,
                          lapack_int n, lapack_int nrhs,
                          lapack_complex_float* a, lapack_int lda,
                          lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zgels( int matrix_order, char trans, lapack_int m,
                          lapack_int n, lapack_int nrhs,
                          lapack_complex_double* a, lapack_int lda,
                          lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sgelsd( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, float* a, lapack_int lda, float* b,
                           lapack_int ldb, float* s, float rcond,
                           lapack_int* rank );
lapack_int LAPACKE_dgelsd( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, double* a, lapack_int lda,
                           double* b, lapack_int ldb, double* s, double rcond,
                           lapack_int* rank );
lapack_int LAPACKE_cgelsd( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb, float* s, float rcond,
                           lapack_int* rank );
lapack_int LAPACKE_zgelsd( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb, double* s, double rcond,
                           lapack_int* rank );

lapack_int LAPACKE_sgelss( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, float* a, lapack_int lda, float* b,
                           lapack_int ldb, float* s, float rcond,
                           lapack_int* rank );
lapack_int LAPACKE_dgelss( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, double* a, lapack_int lda,
                           double* b, lapack_int ldb, double* s, double rcond,
                           lapack_int* rank );
lapack_int LAPACKE_cgelss( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb, float* s, float rcond,
                           lapack_int* rank );
lapack_int LAPACKE_zgelss( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb, double* s, double rcond,
                           lapack_int* rank );

lapack_int LAPACKE_sgelsy( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, float* a, lapack_int lda, float* b,
                           lapack_int ldb, lapack_int* jpvt, float rcond,
                           lapack_int* rank );
lapack_int LAPACKE_dgelsy( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, double* a, lapack_int lda,
                           double* b, lapack_int ldb, lapack_int* jpvt,
                           double rcond, lapack_int* rank );
lapack_int LAPACKE_cgelsy( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb, lapack_int* jpvt, float rcond,
                           lapack_int* rank );
lapack_int LAPACKE_zgelsy( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nrhs, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb, lapack_int* jpvt, double rcond,
                           lapack_int* rank );

lapack_int LAPACKE_sgeqlf( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, float* tau );
lapack_int LAPACKE_dgeqlf( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, double* tau );
lapack_int LAPACKE_cgeqlf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* tau );
lapack_int LAPACKE_zgeqlf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* tau );

lapack_int LAPACKE_sgeqp3( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, lapack_int* jpvt,
                           float* tau );
lapack_int LAPACKE_dgeqp3( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, lapack_int* jpvt,
                           double* tau );
lapack_int LAPACKE_cgeqp3( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* jpvt, lapack_complex_float* tau );
lapack_int LAPACKE_zgeqp3( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* jpvt, lapack_complex_double* tau );

lapack_int LAPACKE_sgeqpf( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, lapack_int* jpvt,
                           float* tau );
lapack_int LAPACKE_dgeqpf( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, lapack_int* jpvt,
                           double* tau );
lapack_int LAPACKE_cgeqpf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* jpvt, lapack_complex_float* tau );
lapack_int LAPACKE_zgeqpf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* jpvt, lapack_complex_double* tau );

lapack_int LAPACKE_sgeqr2( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, float* tau );
lapack_int LAPACKE_dgeqr2( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, double* tau );
lapack_int LAPACKE_cgeqr2( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* tau );
lapack_int LAPACKE_zgeqr2( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* tau );

lapack_int LAPACKE_sgeqrf( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, float* tau );
lapack_int LAPACKE_dgeqrf( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, double* tau );
lapack_int LAPACKE_cgeqrf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* tau );
lapack_int LAPACKE_zgeqrf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* tau );

lapack_int LAPACKE_sgeqrfp( int matrix_order, lapack_int m, lapack_int n,
                            float* a, lapack_int lda, float* tau );
lapack_int LAPACKE_dgeqrfp( int matrix_order, lapack_int m, lapack_int n,
                            double* a, lapack_int lda, double* tau );
lapack_int LAPACKE_cgeqrfp( int matrix_order, lapack_int m, lapack_int n,
                            lapack_complex_float* a, lapack_int lda,
                            lapack_complex_float* tau );
lapack_int LAPACKE_zgeqrfp( int matrix_order, lapack_int m, lapack_int n,
                            lapack_complex_double* a, lapack_int lda,
                            lapack_complex_double* tau );

lapack_int LAPACKE_sgerfs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const float* a, lapack_int lda,
                           const float* af, lapack_int ldaf,
                           const lapack_int* ipiv, const float* b,
                           lapack_int ldb, float* x, lapack_int ldx,
                           float* ferr, float* berr );
lapack_int LAPACKE_dgerfs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const double* a, lapack_int lda,
                           const double* af, lapack_int ldaf,
                           const lapack_int* ipiv, const double* b,
                           lapack_int ldb, double* x, lapack_int ldx,
                           double* ferr, double* berr );
lapack_int LAPACKE_cgerfs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* af,
                           lapack_int ldaf, const lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_zgerfs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* af,
                           lapack_int ldaf, const lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_sgerfsx( int matrix_order, char trans, char equed,
                            lapack_int n, lapack_int nrhs, const float* a,
                            lapack_int lda, const float* af, lapack_int ldaf,
                            const lapack_int* ipiv, const float* r,
                            const float* c, const float* b, lapack_int ldb,
                            float* x, lapack_int ldx, float* rcond, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_dgerfsx( int matrix_order, char trans, char equed,
                            lapack_int n, lapack_int nrhs, const double* a,
                            lapack_int lda, const double* af, lapack_int ldaf,
                            const lapack_int* ipiv, const double* r,
                            const double* c, const double* b, lapack_int ldb,
                            double* x, lapack_int ldx, double* rcond,
                            double* berr, lapack_int n_err_bnds,
                            double* err_bnds_norm, double* err_bnds_comp,
                            lapack_int nparams, double* params );
lapack_int LAPACKE_cgerfsx( int matrix_order, char trans, char equed,
                            lapack_int n, lapack_int nrhs,
                            const lapack_complex_float* a, lapack_int lda,
                            const lapack_complex_float* af, lapack_int ldaf,
                            const lapack_int* ipiv, const float* r,
                            const float* c, const lapack_complex_float* b,
                            lapack_int ldb, lapack_complex_float* x,
                            lapack_int ldx, float* rcond, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_zgerfsx( int matrix_order, char trans, char equed,
                            lapack_int n, lapack_int nrhs,
                            const lapack_complex_double* a, lapack_int lda,
                            const lapack_complex_double* af, lapack_int ldaf,
                            const lapack_int* ipiv, const double* r,
                            const double* c, const lapack_complex_double* b,
                            lapack_int ldb, lapack_complex_double* x,
                            lapack_int ldx, double* rcond, double* berr,
                            lapack_int n_err_bnds, double* err_bnds_norm,
                            double* err_bnds_comp, lapack_int nparams,
                            double* params );

lapack_int LAPACKE_sgerqf( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, float* tau );
lapack_int LAPACKE_dgerqf( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, double* tau );
lapack_int LAPACKE_cgerqf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* tau );
lapack_int LAPACKE_zgerqf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* tau );

lapack_int LAPACKE_sgesdd( int matrix_order, char jobz, lapack_int m,
                           lapack_int n, float* a, lapack_int lda, float* s,
                           float* u, lapack_int ldu, float* vt,
                           lapack_int ldvt );
lapack_int LAPACKE_dgesdd( int matrix_order, char jobz, lapack_int m,
                           lapack_int n, double* a, lapack_int lda, double* s,
                           double* u, lapack_int ldu, double* vt,
                           lapack_int ldvt );
lapack_int LAPACKE_cgesdd( int matrix_order, char jobz, lapack_int m,
                           lapack_int n, lapack_complex_float* a,
                           lapack_int lda, float* s, lapack_complex_float* u,
                           lapack_int ldu, lapack_complex_float* vt,
                           lapack_int ldvt );
lapack_int LAPACKE_zgesdd( int matrix_order, char jobz, lapack_int m,
                           lapack_int n, lapack_complex_double* a,
                           lapack_int lda, double* s, lapack_complex_double* u,
                           lapack_int ldu, lapack_complex_double* vt,
                           lapack_int ldvt );

lapack_int LAPACKE_sgesv( int matrix_order, lapack_int n, lapack_int nrhs,
                          float* a, lapack_int lda, lapack_int* ipiv, float* b,
                          lapack_int ldb );
lapack_int LAPACKE_dgesv( int matrix_order, lapack_int n, lapack_int nrhs,
                          double* a, lapack_int lda, lapack_int* ipiv,
                          double* b, lapack_int ldb );
lapack_int LAPACKE_cgesv( int matrix_order, lapack_int n, lapack_int nrhs,
                          lapack_complex_float* a, lapack_int lda,
                          lapack_int* ipiv, lapack_complex_float* b,
                          lapack_int ldb );
lapack_int LAPACKE_zgesv( int matrix_order, lapack_int n, lapack_int nrhs,
                          lapack_complex_double* a, lapack_int lda,
                          lapack_int* ipiv, lapack_complex_double* b,
                          lapack_int ldb );
lapack_int LAPACKE_dsgesv( int matrix_order, lapack_int n, lapack_int nrhs,
                           double* a, lapack_int lda, lapack_int* ipiv,
                           double* b, lapack_int ldb, double* x, lapack_int ldx,
                           lapack_int* iter );
lapack_int LAPACKE_zcgesv( int matrix_order, lapack_int n, lapack_int nrhs,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* ipiv, lapack_complex_double* b,
                           lapack_int ldb, lapack_complex_double* x,
                           lapack_int ldx, lapack_int* iter );

lapack_int LAPACKE_sgesvd( int matrix_order, char jobu, char jobvt,
                           lapack_int m, lapack_int n, float* a, lapack_int lda,
                           float* s, float* u, lapack_int ldu, float* vt,
                           lapack_int ldvt, float* superb );
lapack_int LAPACKE_dgesvd( int matrix_order, char jobu, char jobvt,
                           lapack_int m, lapack_int n, double* a,
                           lapack_int lda, double* s, double* u, lapack_int ldu,
                           double* vt, lapack_int ldvt, double* superb );
lapack_int LAPACKE_cgesvd( int matrix_order, char jobu, char jobvt,
                           lapack_int m, lapack_int n, lapack_complex_float* a,
                           lapack_int lda, float* s, lapack_complex_float* u,
                           lapack_int ldu, lapack_complex_float* vt,
                           lapack_int ldvt, float* superb );
lapack_int LAPACKE_zgesvd( int matrix_order, char jobu, char jobvt,
                           lapack_int m, lapack_int n, lapack_complex_double* a,
                           lapack_int lda, double* s, lapack_complex_double* u,
                           lapack_int ldu, lapack_complex_double* vt,
                           lapack_int ldvt, double* superb );

lapack_int LAPACKE_sgesvj( int matrix_order, char joba, char jobu, char jobv,
                           lapack_int m, lapack_int n, float* a, lapack_int lda,
                           float* sva, lapack_int mv, float* v, lapack_int ldv,
                           float* stat );
lapack_int LAPACKE_dgesvj( int matrix_order, char joba, char jobu, char jobv,
                           lapack_int m, lapack_int n, double* a,
                           lapack_int lda, double* sva, lapack_int mv,
                           double* v, lapack_int ldv, double* stat );

lapack_int LAPACKE_sgesvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int nrhs, float* a,
                           lapack_int lda, float* af, lapack_int ldaf,
                           lapack_int* ipiv, char* equed, float* r, float* c,
                           float* b, lapack_int ldb, float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr,
                           float* rpivot );
lapack_int LAPACKE_dgesvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int nrhs, double* a,
                           lapack_int lda, double* af, lapack_int ldaf,
                           lapack_int* ipiv, char* equed, double* r, double* c,
                           double* b, lapack_int ldb, double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr,
                           double* rpivot );
lapack_int LAPACKE_cgesvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int nrhs,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* af, lapack_int ldaf,
                           lapack_int* ipiv, char* equed, float* r, float* c,
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr,
                           float* rpivot );
lapack_int LAPACKE_zgesvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int nrhs,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* af, lapack_int ldaf,
                           lapack_int* ipiv, char* equed, double* r, double* c,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr,
                           double* rpivot );

lapack_int LAPACKE_sgesvxx( int matrix_order, char fact, char trans,
                            lapack_int n, lapack_int nrhs, float* a,
                            lapack_int lda, float* af, lapack_int ldaf,
                            lapack_int* ipiv, char* equed, float* r, float* c,
                            float* b, lapack_int ldb, float* x, lapack_int ldx,
                            float* rcond, float* rpvgrw, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_dgesvxx( int matrix_order, char fact, char trans,
                            lapack_int n, lapack_int nrhs, double* a,
                            lapack_int lda, double* af, lapack_int ldaf,
                            lapack_int* ipiv, char* equed, double* r, double* c,
                            double* b, lapack_int ldb, double* x,
                            lapack_int ldx, double* rcond, double* rpvgrw,
                            double* berr, lapack_int n_err_bnds,
                            double* err_bnds_norm, double* err_bnds_comp,
                            lapack_int nparams, double* params );
lapack_int LAPACKE_cgesvxx( int matrix_order, char fact, char trans,
                            lapack_int n, lapack_int nrhs,
                            lapack_complex_float* a, lapack_int lda,
                            lapack_complex_float* af, lapack_int ldaf,
                            lapack_int* ipiv, char* equed, float* r, float* c,
                            lapack_complex_float* b, lapack_int ldb,
                            lapack_complex_float* x, lapack_int ldx,
                            float* rcond, float* rpvgrw, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_zgesvxx( int matrix_order, char fact, char trans,
                            lapack_int n, lapack_int nrhs,
                            lapack_complex_double* a, lapack_int lda,
                            lapack_complex_double* af, lapack_int ldaf,
                            lapack_int* ipiv, char* equed, double* r, double* c,
                            lapack_complex_double* b, lapack_int ldb,
                            lapack_complex_double* x, lapack_int ldx,
                            double* rcond, double* rpvgrw, double* berr,
                            lapack_int n_err_bnds, double* err_bnds_norm,
                            double* err_bnds_comp, lapack_int nparams,
                            double* params );

lapack_int LAPACKE_sgetf2( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, lapack_int* ipiv );
lapack_int LAPACKE_dgetf2( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, lapack_int* ipiv );
lapack_int LAPACKE_cgetf2( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* ipiv );
lapack_int LAPACKE_zgetf2( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* ipiv );

lapack_int LAPACKE_sgetrf( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, lapack_int* ipiv );
lapack_int LAPACKE_dgetrf( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, lapack_int* ipiv );
lapack_int LAPACKE_cgetrf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* ipiv );
lapack_int LAPACKE_zgetrf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* ipiv );

lapack_int LAPACKE_sgetri( int matrix_order, lapack_int n, float* a,
                           lapack_int lda, const lapack_int* ipiv );
lapack_int LAPACKE_dgetri( int matrix_order, lapack_int n, double* a,
                           lapack_int lda, const lapack_int* ipiv );
lapack_int LAPACKE_cgetri( int matrix_order, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           const lapack_int* ipiv );
lapack_int LAPACKE_zgetri( int matrix_order, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           const lapack_int* ipiv );

lapack_int LAPACKE_sgetrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const float* a, lapack_int lda,
                           const lapack_int* ipiv, float* b, lapack_int ldb );
lapack_int LAPACKE_dgetrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const double* a, lapack_int lda,
                           const lapack_int* ipiv, double* b, lapack_int ldb );
lapack_int LAPACKE_cgetrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, const lapack_int* ipiv,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zgetrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, const lapack_int* ipiv,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sggbak( int matrix_order, char job, char side, lapack_int n,
                           lapack_int ilo, lapack_int ihi, const float* lscale,
                           const float* rscale, lapack_int m, float* v,
                           lapack_int ldv );
lapack_int LAPACKE_dggbak( int matrix_order, char job, char side, lapack_int n,
                           lapack_int ilo, lapack_int ihi, const double* lscale,
                           const double* rscale, lapack_int m, double* v,
                           lapack_int ldv );
lapack_int LAPACKE_cggbak( int matrix_order, char job, char side, lapack_int n,
                           lapack_int ilo, lapack_int ihi, const float* lscale,
                           const float* rscale, lapack_int m,
                           lapack_complex_float* v, lapack_int ldv );
lapack_int LAPACKE_zggbak( int matrix_order, char job, char side, lapack_int n,
                           lapack_int ilo, lapack_int ihi, const double* lscale,
                           const double* rscale, lapack_int m,
                           lapack_complex_double* v, lapack_int ldv );

lapack_int LAPACKE_sggbal( int matrix_order, char job, lapack_int n, float* a,
                           lapack_int lda, float* b, lapack_int ldb,
                           lapack_int* ilo, lapack_int* ihi, float* lscale,
                           float* rscale );
lapack_int LAPACKE_dggbal( int matrix_order, char job, lapack_int n, double* a,
                           lapack_int lda, double* b, lapack_int ldb,
                           lapack_int* ilo, lapack_int* ihi, double* lscale,
                           double* rscale );
lapack_int LAPACKE_cggbal( int matrix_order, char job, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_int* ilo, lapack_int* ihi, float* lscale,
                           float* rscale );
lapack_int LAPACKE_zggbal( int matrix_order, char job, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_int* ilo, lapack_int* ihi, double* lscale,
                           double* rscale );

lapack_int LAPACKE_sgges( int matrix_order, char jobvsl, char jobvsr, char sort,
                          LAPACK_S_SELECT3 selctg, lapack_int n, float* a,
                          lapack_int lda, float* b, lapack_int ldb,
                          lapack_int* sdim, float* alphar, float* alphai,
                          float* beta, float* vsl, lapack_int ldvsl, float* vsr,
                          lapack_int ldvsr );
lapack_int LAPACKE_dgges( int matrix_order, char jobvsl, char jobvsr, char sort,
                          LAPACK_D_SELECT3 selctg, lapack_int n, double* a,
                          lapack_int lda, double* b, lapack_int ldb,
                          lapack_int* sdim, double* alphar, double* alphai,
                          double* beta, double* vsl, lapack_int ldvsl,
                          double* vsr, lapack_int ldvsr );
lapack_int LAPACKE_cgges( int matrix_order, char jobvsl, char jobvsr, char sort,
                          LAPACK_C_SELECT2 selctg, lapack_int n,
                          lapack_complex_float* a, lapack_int lda,
                          lapack_complex_float* b, lapack_int ldb,
                          lapack_int* sdim, lapack_complex_float* alpha,
                          lapack_complex_float* beta, lapack_complex_float* vsl,
                          lapack_int ldvsl, lapack_complex_float* vsr,
                          lapack_int ldvsr );
lapack_int LAPACKE_zgges( int matrix_order, char jobvsl, char jobvsr, char sort,
                          LAPACK_Z_SELECT2 selctg, lapack_int n,
                          lapack_complex_double* a, lapack_int lda,
                          lapack_complex_double* b, lapack_int ldb,
                          lapack_int* sdim, lapack_complex_double* alpha,
                          lapack_complex_double* beta,
                          lapack_complex_double* vsl, lapack_int ldvsl,
                          lapack_complex_double* vsr, lapack_int ldvsr );

lapack_int LAPACKE_sggesx( int matrix_order, char jobvsl, char jobvsr,
                           char sort, LAPACK_S_SELECT3 selctg, char sense,
                           lapack_int n, float* a, lapack_int lda, float* b,
                           lapack_int ldb, lapack_int* sdim, float* alphar,
                           float* alphai, float* beta, float* vsl,
                           lapack_int ldvsl, float* vsr, lapack_int ldvsr,
                           float* rconde, float* rcondv );
lapack_int LAPACKE_dggesx( int matrix_order, char jobvsl, char jobvsr,
                           char sort, LAPACK_D_SELECT3 selctg, char sense,
                           lapack_int n, double* a, lapack_int lda, double* b,
                           lapack_int ldb, lapack_int* sdim, double* alphar,
                           double* alphai, double* beta, double* vsl,
                           lapack_int ldvsl, double* vsr, lapack_int ldvsr,
                           double* rconde, double* rcondv );
lapack_int LAPACKE_cggesx( int matrix_order, char jobvsl, char jobvsr,
                           char sort, LAPACK_C_SELECT2 selctg, char sense,
                           lapack_int n, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb, lapack_int* sdim,
                           lapack_complex_float* alpha,
                           lapack_complex_float* beta,
                           lapack_complex_float* vsl, lapack_int ldvsl,
                           lapack_complex_float* vsr, lapack_int ldvsr,
                           float* rconde, float* rcondv );
lapack_int LAPACKE_zggesx( int matrix_order, char jobvsl, char jobvsr,
                           char sort, LAPACK_Z_SELECT2 selctg, char sense,
                           lapack_int n, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb, lapack_int* sdim,
                           lapack_complex_double* alpha,
                           lapack_complex_double* beta,
                           lapack_complex_double* vsl, lapack_int ldvsl,
                           lapack_complex_double* vsr, lapack_int ldvsr,
                           double* rconde, double* rcondv );

lapack_int LAPACKE_sggev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, float* a, lapack_int lda, float* b,
                          lapack_int ldb, float* alphar, float* alphai,
                          float* beta, float* vl, lapack_int ldvl, float* vr,
                          lapack_int ldvr );
lapack_int LAPACKE_dggev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, double* a, lapack_int lda, double* b,
                          lapack_int ldb, double* alphar, double* alphai,
                          double* beta, double* vl, lapack_int ldvl, double* vr,
                          lapack_int ldvr );
lapack_int LAPACKE_cggev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, lapack_complex_float* a, lapack_int lda,
                          lapack_complex_float* b, lapack_int ldb,
                          lapack_complex_float* alpha,
                          lapack_complex_float* beta, lapack_complex_float* vl,
                          lapack_int ldvl, lapack_complex_float* vr,
                          lapack_int ldvr );
lapack_int LAPACKE_zggev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, lapack_complex_double* a,
                          lapack_int lda, lapack_complex_double* b,
                          lapack_int ldb, lapack_complex_double* alpha,
                          lapack_complex_double* beta,
                          lapack_complex_double* vl, lapack_int ldvl,
                          lapack_complex_double* vr, lapack_int ldvr );

lapack_int LAPACKE_sggevx( int matrix_order, char balanc, char jobvl,
                           char jobvr, char sense, lapack_int n, float* a,
                           lapack_int lda, float* b, lapack_int ldb,
                           float* alphar, float* alphai, float* beta, float* vl,
                           lapack_int ldvl, float* vr, lapack_int ldvr,
                           lapack_int* ilo, lapack_int* ihi, float* lscale,
                           float* rscale, float* abnrm, float* bbnrm,
                           float* rconde, float* rcondv );
lapack_int LAPACKE_dggevx( int matrix_order, char balanc, char jobvl,
                           char jobvr, char sense, lapack_int n, double* a,
                           lapack_int lda, double* b, lapack_int ldb,
                           double* alphar, double* alphai, double* beta,
                           double* vl, lapack_int ldvl, double* vr,
                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,
                           double* lscale, double* rscale, double* abnrm,
                           double* bbnrm, double* rconde, double* rcondv );
lapack_int LAPACKE_cggevx( int matrix_order, char balanc, char jobvl,
                           char jobvr, char sense, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* alpha,
                           lapack_complex_float* beta, lapack_complex_float* vl,
                           lapack_int ldvl, lapack_complex_float* vr,
                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,
                           float* lscale, float* rscale, float* abnrm,
                           float* bbnrm, float* rconde, float* rcondv );
lapack_int LAPACKE_zggevx( int matrix_order, char balanc, char jobvl,
                           char jobvr, char sense, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* alpha,
                           lapack_complex_double* beta,
                           lapack_complex_double* vl, lapack_int ldvl,
                           lapack_complex_double* vr, lapack_int ldvr,
                           lapack_int* ilo, lapack_int* ihi, double* lscale,
                           double* rscale, double* abnrm, double* bbnrm,
                           double* rconde, double* rcondv );

lapack_int LAPACKE_sggglm( int matrix_order, lapack_int n, lapack_int m,
                           lapack_int p, float* a, lapack_int lda, float* b,
                           lapack_int ldb, float* d, float* x, float* y );
lapack_int LAPACKE_dggglm( int matrix_order, lapack_int n, lapack_int m,
                           lapack_int p, double* a, lapack_int lda, double* b,
                           lapack_int ldb, double* d, double* x, double* y );
lapack_int LAPACKE_cggglm( int matrix_order, lapack_int n, lapack_int m,
                           lapack_int p, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb, lapack_complex_float* d,
                           lapack_complex_float* x, lapack_complex_float* y );
lapack_int LAPACKE_zggglm( int matrix_order, lapack_int n, lapack_int m,
                           lapack_int p, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb, lapack_complex_double* d,
                           lapack_complex_double* x, lapack_complex_double* y );

lapack_int LAPACKE_sgghrd( int matrix_order, char compq, char compz,
                           lapack_int n, lapack_int ilo, lapack_int ihi,
                           float* a, lapack_int lda, float* b, lapack_int ldb,
                           float* q, lapack_int ldq, float* z, lapack_int ldz );
lapack_int LAPACKE_dgghrd( int matrix_order, char compq, char compz,
                           lapack_int n, lapack_int ilo, lapack_int ihi,
                           double* a, lapack_int lda, double* b, lapack_int ldb,
                           double* q, lapack_int ldq, double* z,
                           lapack_int ldz );
lapack_int LAPACKE_cgghrd( int matrix_order, char compq, char compz,
                           lapack_int n, lapack_int ilo, lapack_int ihi,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* q, lapack_int ldq,
                           lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zgghrd( int matrix_order, char compq, char compz,
                           lapack_int n, lapack_int ilo, lapack_int ihi,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* q, lapack_int ldq,
                           lapack_complex_double* z, lapack_int ldz );

lapack_int LAPACKE_sgglse( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int p, float* a, lapack_int lda, float* b,
                           lapack_int ldb, float* c, float* d, float* x );
lapack_int LAPACKE_dgglse( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int p, double* a, lapack_int lda, double* b,
                           lapack_int ldb, double* c, double* d, double* x );
lapack_int LAPACKE_cgglse( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int p, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb, lapack_complex_float* c,
                           lapack_complex_float* d, lapack_complex_float* x );
lapack_int LAPACKE_zgglse( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int p, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb, lapack_complex_double* c,
                           lapack_complex_double* d, lapack_complex_double* x );

lapack_int LAPACKE_sggqrf( int matrix_order, lapack_int n, lapack_int m,
                           lapack_int p, float* a, lapack_int lda, float* taua,
                           float* b, lapack_int ldb, float* taub );
lapack_int LAPACKE_dggqrf( int matrix_order, lapack_int n, lapack_int m,
                           lapack_int p, double* a, lapack_int lda,
                           double* taua, double* b, lapack_int ldb,
                           double* taub );
lapack_int LAPACKE_cggqrf( int matrix_order, lapack_int n, lapack_int m,
                           lapack_int p, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* taua,
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* taub );
lapack_int LAPACKE_zggqrf( int matrix_order, lapack_int n, lapack_int m,
                           lapack_int p, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* taua,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* taub );

lapack_int LAPACKE_sggrqf( int matrix_order, lapack_int m, lapack_int p,
                           lapack_int n, float* a, lapack_int lda, float* taua,
                           float* b, lapack_int ldb, float* taub );
lapack_int LAPACKE_dggrqf( int matrix_order, lapack_int m, lapack_int p,
                           lapack_int n, double* a, lapack_int lda,
                           double* taua, double* b, lapack_int ldb,
                           double* taub );
lapack_int LAPACKE_cggrqf( int matrix_order, lapack_int m, lapack_int p,
                           lapack_int n, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* taua,
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* taub );
lapack_int LAPACKE_zggrqf( int matrix_order, lapack_int m, lapack_int p,
                           lapack_int n, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* taua,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* taub );

lapack_int LAPACKE_sggsvd( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int n, lapack_int p,
                           lapack_int* k, lapack_int* l, float* a,
                           lapack_int lda, float* b, lapack_int ldb,
                           float* alpha, float* beta, float* u, lapack_int ldu,
                           float* v, lapack_int ldv, float* q, lapack_int ldq,
                           lapack_int* iwork );
lapack_int LAPACKE_dggsvd( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int n, lapack_int p,
                           lapack_int* k, lapack_int* l, double* a,
                           lapack_int lda, double* b, lapack_int ldb,
                           double* alpha, double* beta, double* u,
                           lapack_int ldu, double* v, lapack_int ldv, double* q,
                           lapack_int ldq, lapack_int* iwork );
lapack_int LAPACKE_cggsvd( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int n, lapack_int p,
                           lapack_int* k, lapack_int* l,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb,
                           float* alpha, float* beta, lapack_complex_float* u,
                           lapack_int ldu, lapack_complex_float* v,
                           lapack_int ldv, lapack_complex_float* q,
                           lapack_int ldq, lapack_int* iwork );
lapack_int LAPACKE_zggsvd( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int n, lapack_int p,
                           lapack_int* k, lapack_int* l,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb,
                           double* alpha, double* beta,
                           lapack_complex_double* u, lapack_int ldu,
                           lapack_complex_double* v, lapack_int ldv,
                           lapack_complex_double* q, lapack_int ldq,
                           lapack_int* iwork );

lapack_int LAPACKE_sggsvp( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int p, lapack_int n, float* a,
                           lapack_int lda, float* b, lapack_int ldb, float tola,
                           float tolb, lapack_int* k, lapack_int* l, float* u,
                           lapack_int ldu, float* v, lapack_int ldv, float* q,
                           lapack_int ldq );
lapack_int LAPACKE_dggsvp( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int p, lapack_int n, double* a,
                           lapack_int lda, double* b, lapack_int ldb,
                           double tola, double tolb, lapack_int* k,
                           lapack_int* l, double* u, lapack_int ldu, double* v,
                           lapack_int ldv, double* q, lapack_int ldq );
lapack_int LAPACKE_cggsvp( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int p, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb, float tola,
                           float tolb, lapack_int* k, lapack_int* l,
                           lapack_complex_float* u, lapack_int ldu,
                           lapack_complex_float* v, lapack_int ldv,
                           lapack_complex_float* q, lapack_int ldq );
lapack_int LAPACKE_zggsvp( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int p, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb,
                           double tola, double tolb, lapack_int* k,
                           lapack_int* l, lapack_complex_double* u,
                           lapack_int ldu, lapack_complex_double* v,
                           lapack_int ldv, lapack_complex_double* q,
                           lapack_int ldq );

lapack_int LAPACKE_sgtcon( char norm, lapack_int n, const float* dl,
                           const float* d, const float* du, const float* du2,
                           const lapack_int* ipiv, float anorm, float* rcond );
lapack_int LAPACKE_dgtcon( char norm, lapack_int n, const double* dl,
                           const double* d, const double* du, const double* du2,
                           const lapack_int* ipiv, double anorm,
                           double* rcond );
lapack_int LAPACKE_cgtcon( char norm, lapack_int n,
                           const lapack_complex_float* dl,
                           const lapack_complex_float* d,
                           const lapack_complex_float* du,
                           const lapack_complex_float* du2,
                           const lapack_int* ipiv, float anorm, float* rcond );
lapack_int LAPACKE_zgtcon( char norm, lapack_int n,
                           const lapack_complex_double* dl,
                           const lapack_complex_double* d,
                           const lapack_complex_double* du,
                           const lapack_complex_double* du2,
                           const lapack_int* ipiv, double anorm,
                           double* rcond );

lapack_int LAPACKE_sgtrfs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const float* dl, const float* d,
                           const float* du, const float* dlf, const float* df,
                           const float* duf, const float* du2,
                           const lapack_int* ipiv, const float* b,
                           lapack_int ldb, float* x, lapack_int ldx,
                           float* ferr, float* berr );
lapack_int LAPACKE_dgtrfs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const double* dl, const double* d,
                           const double* du, const double* dlf,
                           const double* df, const double* duf,
                           const double* du2, const lapack_int* ipiv,
                           const double* b, lapack_int ldb, double* x,
                           lapack_int ldx, double* ferr, double* berr );
lapack_int LAPACKE_cgtrfs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* dl,
                           const lapack_complex_float* d,
                           const lapack_complex_float* du,
                           const lapack_complex_float* dlf,
                           const lapack_complex_float* df,
                           const lapack_complex_float* duf,
                           const lapack_complex_float* du2,
                           const lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_zgtrfs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* dl,
                           const lapack_complex_double* d,
                           const lapack_complex_double* du,
                           const lapack_complex_double* dlf,
                           const lapack_complex_double* df,
                           const lapack_complex_double* duf,
                           const lapack_complex_double* du2,
                           const lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_sgtsv( int matrix_order, lapack_int n, lapack_int nrhs,
                          float* dl, float* d, float* du, float* b,
                          lapack_int ldb );
lapack_int LAPACKE_dgtsv( int matrix_order, lapack_int n, lapack_int nrhs,
                          double* dl, double* d, double* du, double* b,
                          lapack_int ldb );
lapack_int LAPACKE_cgtsv( int matrix_order, lapack_int n, lapack_int nrhs,
                          lapack_complex_float* dl, lapack_complex_float* d,
                          lapack_complex_float* du, lapack_complex_float* b,
                          lapack_int ldb );
lapack_int LAPACKE_zgtsv( int matrix_order, lapack_int n, lapack_int nrhs,
                          lapack_complex_double* dl, lapack_complex_double* d,
                          lapack_complex_double* du, lapack_complex_double* b,
                          lapack_int ldb );

lapack_int LAPACKE_sgtsvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int nrhs, const float* dl,
                           const float* d, const float* du, float* dlf,
                           float* df, float* duf, float* du2, lapack_int* ipiv,
                           const float* b, lapack_int ldb, float* x,
                           lapack_int ldx, float* rcond, float* ferr,
                           float* berr );
lapack_int LAPACKE_dgtsvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int nrhs, const double* dl,
                           const double* d, const double* du, double* dlf,
                           double* df, double* duf, double* du2,
                           lapack_int* ipiv, const double* b, lapack_int ldb,
                           double* x, lapack_int ldx, double* rcond,
                           double* ferr, double* berr );
lapack_int LAPACKE_cgtsvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_float* dl,
                           const lapack_complex_float* d,
                           const lapack_complex_float* du,
                           lapack_complex_float* dlf, lapack_complex_float* df,
                           lapack_complex_float* duf, lapack_complex_float* du2,
                           lapack_int* ipiv, const lapack_complex_float* b,
                           lapack_int ldb, lapack_complex_float* x,
                           lapack_int ldx, float* rcond, float* ferr,
                           float* berr );
lapack_int LAPACKE_zgtsvx( int matrix_order, char fact, char trans,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_double* dl,
                           const lapack_complex_double* d,
                           const lapack_complex_double* du,
                           lapack_complex_double* dlf,
                           lapack_complex_double* df,
                           lapack_complex_double* duf,
                           lapack_complex_double* du2, lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr );

lapack_int LAPACKE_sgttrf( lapack_int n, float* dl, float* d, float* du,
                           float* du2, lapack_int* ipiv );
lapack_int LAPACKE_dgttrf( lapack_int n, double* dl, double* d, double* du,
                           double* du2, lapack_int* ipiv );
lapack_int LAPACKE_cgttrf( lapack_int n, lapack_complex_float* dl,
                           lapack_complex_float* d, lapack_complex_float* du,
                           lapack_complex_float* du2, lapack_int* ipiv );
lapack_int LAPACKE_zgttrf( lapack_int n, lapack_complex_double* dl,
                           lapack_complex_double* d, lapack_complex_double* du,
                           lapack_complex_double* du2, lapack_int* ipiv );

lapack_int LAPACKE_sgttrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const float* dl, const float* d,
                           const float* du, const float* du2,
                           const lapack_int* ipiv, float* b, lapack_int ldb );
lapack_int LAPACKE_dgttrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const double* dl, const double* d,
                           const double* du, const double* du2,
                           const lapack_int* ipiv, double* b, lapack_int ldb );
lapack_int LAPACKE_cgttrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* dl,
                           const lapack_complex_float* d,
                           const lapack_complex_float* du,
                           const lapack_complex_float* du2,
                           const lapack_int* ipiv, lapack_complex_float* b,
                           lapack_int ldb );
lapack_int LAPACKE_zgttrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* dl,
                           const lapack_complex_double* d,
                           const lapack_complex_double* du,
                           const lapack_complex_double* du2,
                           const lapack_int* ipiv, lapack_complex_double* b,
                           lapack_int ldb );

lapack_int LAPACKE_chbev( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_int kd, lapack_complex_float* ab,
                          lapack_int ldab, float* w, lapack_complex_float* z,
                          lapack_int ldz );
lapack_int LAPACKE_zhbev( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_int kd, lapack_complex_double* ab,
                          lapack_int ldab, double* w, lapack_complex_double* z,
                          lapack_int ldz );

lapack_int LAPACKE_chbevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_int kd, lapack_complex_float* ab,
                           lapack_int ldab, float* w, lapack_complex_float* z,
                           lapack_int ldz );
lapack_int LAPACKE_zhbevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_int kd, lapack_complex_double* ab,
                           lapack_int ldab, double* w, lapack_complex_double* z,
                           lapack_int ldz );

lapack_int LAPACKE_chbevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_int kd,
                           lapack_complex_float* ab, lapack_int ldab,
                           lapack_complex_float* q, lapack_int ldq, float vl,
                           float vu, lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, lapack_complex_float* z,
                           lapack_int ldz, lapack_int* ifail );
lapack_int LAPACKE_zhbevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_int kd,
                           lapack_complex_double* ab, lapack_int ldab,
                           lapack_complex_double* q, lapack_int ldq, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w,
                           lapack_complex_double* z, lapack_int ldz,
                           lapack_int* ifail );

lapack_int LAPACKE_chbgst( int matrix_order, char vect, char uplo, lapack_int n,
                           lapack_int ka, lapack_int kb,
                           lapack_complex_float* ab, lapack_int ldab,
                           const lapack_complex_float* bb, lapack_int ldbb,
                           lapack_complex_float* x, lapack_int ldx );
lapack_int LAPACKE_zhbgst( int matrix_order, char vect, char uplo, lapack_int n,
                           lapack_int ka, lapack_int kb,
                           lapack_complex_double* ab, lapack_int ldab,
                           const lapack_complex_double* bb, lapack_int ldbb,
                           lapack_complex_double* x, lapack_int ldx );

lapack_int LAPACKE_chbgv( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_int ka, lapack_int kb,
                          lapack_complex_float* ab, lapack_int ldab,
                          lapack_complex_float* bb, lapack_int ldbb, float* w,
                          lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zhbgv( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_int ka, lapack_int kb,
                          lapack_complex_double* ab, lapack_int ldab,
                          lapack_complex_double* bb, lapack_int ldbb, double* w,
                          lapack_complex_double* z, lapack_int ldz );

lapack_int LAPACKE_chbgvd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_int ka, lapack_int kb,
                           lapack_complex_float* ab, lapack_int ldab,
                           lapack_complex_float* bb, lapack_int ldbb, float* w,
                           lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zhbgvd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_int ka, lapack_int kb,
                           lapack_complex_double* ab, lapack_int ldab,
                           lapack_complex_double* bb, lapack_int ldbb,
                           double* w, lapack_complex_double* z,
                           lapack_int ldz );

lapack_int LAPACKE_chbgvx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_int ka, lapack_int kb,
                           lapack_complex_float* ab, lapack_int ldab,
                           lapack_complex_float* bb, lapack_int ldbb,
                           lapack_complex_float* q, lapack_int ldq, float vl,
                           float vu, lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, lapack_complex_float* z,
                           lapack_int ldz, lapack_int* ifail );
lapack_int LAPACKE_zhbgvx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_int ka, lapack_int kb,
                           lapack_complex_double* ab, lapack_int ldab,
                           lapack_complex_double* bb, lapack_int ldbb,
                           lapack_complex_double* q, lapack_int ldq, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w,
                           lapack_complex_double* z, lapack_int ldz,
                           lapack_int* ifail );

lapack_int LAPACKE_chbtrd( int matrix_order, char vect, char uplo, lapack_int n,
                           lapack_int kd, lapack_complex_float* ab,
                           lapack_int ldab, float* d, float* e,
                           lapack_complex_float* q, lapack_int ldq );
lapack_int LAPACKE_zhbtrd( int matrix_order, char vect, char uplo, lapack_int n,
                           lapack_int kd, lapack_complex_double* ab,
                           lapack_int ldab, double* d, double* e,
                           lapack_complex_double* q, lapack_int ldq );

lapack_int LAPACKE_checon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_int* ipiv, float anorm, float* rcond );
lapack_int LAPACKE_zhecon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_int* ipiv, double anorm,
                           double* rcond );

lapack_int LAPACKE_cheequb( int matrix_order, char uplo, lapack_int n,
                            const lapack_complex_float* a, lapack_int lda,
                            float* s, float* scond, float* amax );
lapack_int LAPACKE_zheequb( int matrix_order, char uplo, lapack_int n,
                            const lapack_complex_double* a, lapack_int lda,
                            double* s, double* scond, double* amax );

lapack_int LAPACKE_cheev( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_complex_float* a, lapack_int lda, float* w );
lapack_int LAPACKE_zheev( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_complex_double* a, lapack_int lda, double* w );

lapack_int LAPACKE_cheevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda, float* w );
lapack_int LAPACKE_zheevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           double* w );

lapack_int LAPACKE_cheevr( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_complex_float* a,
                           lapack_int lda, float vl, float vu, lapack_int il,
                           lapack_int iu, float abstol, lapack_int* m, float* w,
                           lapack_complex_float* z, lapack_int ldz,
                           lapack_int* isuppz );
lapack_int LAPACKE_zheevr( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_complex_double* a,
                           lapack_int lda, double vl, double vu, lapack_int il,
                           lapack_int iu, double abstol, lapack_int* m,
                           double* w, lapack_complex_double* z, lapack_int ldz,
                           lapack_int* isuppz );

lapack_int LAPACKE_cheevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_complex_float* a,
                           lapack_int lda, float vl, float vu, lapack_int il,
                           lapack_int iu, float abstol, lapack_int* m, float* w,
                           lapack_complex_float* z, lapack_int ldz,
                           lapack_int* ifail );
lapack_int LAPACKE_zheevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_complex_double* a,
                           lapack_int lda, double vl, double vu, lapack_int il,
                           lapack_int iu, double abstol, lapack_int* m,
                           double* w, lapack_complex_double* z, lapack_int ldz,
                           lapack_int* ifail );

lapack_int LAPACKE_chegst( int matrix_order, lapack_int itype, char uplo,
                           lapack_int n, lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* b,
                           lapack_int ldb );
lapack_int LAPACKE_zhegst( int matrix_order, lapack_int itype, char uplo,
                           lapack_int n, lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* b,
                           lapack_int ldb );

lapack_int LAPACKE_chegv( int matrix_order, lapack_int itype, char jobz,
                          char uplo, lapack_int n, lapack_complex_float* a,
                          lapack_int lda, lapack_complex_float* b,
                          lapack_int ldb, float* w );
lapack_int LAPACKE_zhegv( int matrix_order, lapack_int itype, char jobz,
                          char uplo, lapack_int n, lapack_complex_double* a,
                          lapack_int lda, lapack_complex_double* b,
                          lapack_int ldb, double* w );

lapack_int LAPACKE_chegvd( int matrix_order, lapack_int itype, char jobz,
                           char uplo, lapack_int n, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb, float* w );
lapack_int LAPACKE_zhegvd( int matrix_order, lapack_int itype, char jobz,
                           char uplo, lapack_int n, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb, double* w );

lapack_int LAPACKE_chegvx( int matrix_order, lapack_int itype, char jobz,
                           char range, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb, float vl,
                           float vu, lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, lapack_complex_float* z,
                           lapack_int ldz, lapack_int* ifail );
lapack_int LAPACKE_zhegvx( int matrix_order, lapack_int itype, char jobz,
                           char range, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w,
                           lapack_complex_double* z, lapack_int ldz,
                           lapack_int* ifail );

lapack_int LAPACKE_cherfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* af,
                           lapack_int ldaf, const lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_zherfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* af,
                           lapack_int ldaf, const lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_cherfsx( int matrix_order, char uplo, char equed,
                            lapack_int n, lapack_int nrhs,
                            const lapack_complex_float* a, lapack_int lda,
                            const lapack_complex_float* af, lapack_int ldaf,
                            const lapack_int* ipiv, const float* s,
                            const lapack_complex_float* b, lapack_int ldb,
                            lapack_complex_float* x, lapack_int ldx,
                            float* rcond, float* berr, lapack_int n_err_bnds,
                            float* err_bnds_norm, float* err_bnds_comp,
                            lapack_int nparams, float* params );
lapack_int LAPACKE_zherfsx( int matrix_order, char uplo, char equed,
                            lapack_int n, lapack_int nrhs,
                            const lapack_complex_double* a, lapack_int lda,
                            const lapack_complex_double* af, lapack_int ldaf,
                            const lapack_int* ipiv, const double* s,
                            const lapack_complex_double* b, lapack_int ldb,
                            lapack_complex_double* x, lapack_int ldx,
                            double* rcond, double* berr, lapack_int n_err_bnds,
                            double* err_bnds_norm, double* err_bnds_comp,
                            lapack_int nparams, double* params );

lapack_int LAPACKE_chesv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_float* a,
                          lapack_int lda, lapack_int* ipiv,
                          lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zhesv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_double* a,
                          lapack_int lda, lapack_int* ipiv,
                          lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_chesvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* af,
                           lapack_int ldaf, lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr );
lapack_int LAPACKE_zhesvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* af,
                           lapack_int ldaf, lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr );

lapack_int LAPACKE_chesvxx( int matrix_order, char fact, char uplo,
                            lapack_int n, lapack_int nrhs,
                            lapack_complex_float* a, lapack_int lda,
                            lapack_complex_float* af, lapack_int ldaf,
                            lapack_int* ipiv, char* equed, float* s,
                            lapack_complex_float* b, lapack_int ldb,
                            lapack_complex_float* x, lapack_int ldx,
                            float* rcond, float* rpvgrw, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_zhesvxx( int matrix_order, char fact, char uplo,
                            lapack_int n, lapack_int nrhs,
                            lapack_complex_double* a, lapack_int lda,
                            lapack_complex_double* af, lapack_int ldaf,
                            lapack_int* ipiv, char* equed, double* s,
                            lapack_complex_double* b, lapack_int ldb,
                            lapack_complex_double* x, lapack_int ldx,
                            double* rcond, double* rpvgrw, double* berr,
                            lapack_int n_err_bnds, double* err_bnds_norm,
                            double* err_bnds_comp, lapack_int nparams,
                            double* params );

lapack_int LAPACKE_chetrd( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda, float* d,
                           float* e, lapack_complex_float* tau );
lapack_int LAPACKE_zhetrd( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda, double* d,
                           double* e, lapack_complex_double* tau );

lapack_int LAPACKE_chetrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* ipiv );
lapack_int LAPACKE_zhetrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* ipiv );

lapack_int LAPACKE_chetri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           const lapack_int* ipiv );
lapack_int LAPACKE_zhetri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           const lapack_int* ipiv );

lapack_int LAPACKE_chetrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, const lapack_int* ipiv,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zhetrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, const lapack_int* ipiv,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_chfrk( int matrix_order, char transr, char uplo, char trans,
                          lapack_int n, lapack_int k, float alpha,
                          const lapack_complex_float* a, lapack_int lda,
                          float beta, lapack_complex_float* c );
lapack_int LAPACKE_zhfrk( int matrix_order, char transr, char uplo, char trans,
                          lapack_int n, lapack_int k, double alpha,
                          const lapack_complex_double* a, lapack_int lda,
                          double beta, lapack_complex_double* c );

lapack_int LAPACKE_shgeqz( int matrix_order, char job, char compq, char compz,
                           lapack_int n, lapack_int ilo, lapack_int ihi,
                           float* h, lapack_int ldh, float* t, lapack_int ldt,
                           float* alphar, float* alphai, float* beta, float* q,
                           lapack_int ldq, float* z, lapack_int ldz );
lapack_int LAPACKE_dhgeqz( int matrix_order, char job, char compq, char compz,
                           lapack_int n, lapack_int ilo, lapack_int ihi,
                           double* h, lapack_int ldh, double* t, lapack_int ldt,
                           double* alphar, double* alphai, double* beta,
                           double* q, lapack_int ldq, double* z,
                           lapack_int ldz );
lapack_int LAPACKE_chgeqz( int matrix_order, char job, char compq, char compz,
                           lapack_int n, lapack_int ilo, lapack_int ihi,
                           lapack_complex_float* h, lapack_int ldh,
                           lapack_complex_float* t, lapack_int ldt,
                           lapack_complex_float* alpha,
                           lapack_complex_float* beta, lapack_complex_float* q,
                           lapack_int ldq, lapack_complex_float* z,
                           lapack_int ldz );
lapack_int LAPACKE_zhgeqz( int matrix_order, char job, char compq, char compz,
                           lapack_int n, lapack_int ilo, lapack_int ihi,
                           lapack_complex_double* h, lapack_int ldh,
                           lapack_complex_double* t, lapack_int ldt,
                           lapack_complex_double* alpha,
                           lapack_complex_double* beta,
                           lapack_complex_double* q, lapack_int ldq,
                           lapack_complex_double* z, lapack_int ldz );

lapack_int LAPACKE_chpcon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_float* ap,
                           const lapack_int* ipiv, float anorm, float* rcond );
lapack_int LAPACKE_zhpcon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_double* ap,
                           const lapack_int* ipiv, double anorm,
                           double* rcond );

lapack_int LAPACKE_chpev( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_complex_float* ap, float* w,
                          lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zhpev( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_complex_double* ap, double* w,
                          lapack_complex_double* z, lapack_int ldz );

lapack_int LAPACKE_chpevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_complex_float* ap, float* w,
                           lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zhpevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_complex_double* ap, double* w,
                           lapack_complex_double* z, lapack_int ldz );

lapack_int LAPACKE_chpevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_complex_float* ap, float vl,
                           float vu, lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, lapack_complex_float* z,
                           lapack_int ldz, lapack_int* ifail );
lapack_int LAPACKE_zhpevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_complex_double* ap, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w,
                           lapack_complex_double* z, lapack_int ldz,
                           lapack_int* ifail );

lapack_int LAPACKE_chpgst( int matrix_order, lapack_int itype, char uplo,
                           lapack_int n, lapack_complex_float* ap,
                           const lapack_complex_float* bp );
lapack_int LAPACKE_zhpgst( int matrix_order, lapack_int itype, char uplo,
                           lapack_int n, lapack_complex_double* ap,
                           const lapack_complex_double* bp );

lapack_int LAPACKE_chpgv( int matrix_order, lapack_int itype, char jobz,
                          char uplo, lapack_int n, lapack_complex_float* ap,
                          lapack_complex_float* bp, float* w,
                          lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zhpgv( int matrix_order, lapack_int itype, char jobz,
                          char uplo, lapack_int n, lapack_complex_double* ap,
                          lapack_complex_double* bp, double* w,
                          lapack_complex_double* z, lapack_int ldz );

lapack_int LAPACKE_chpgvd( int matrix_order, lapack_int itype, char jobz,
                           char uplo, lapack_int n, lapack_complex_float* ap,
                           lapack_complex_float* bp, float* w,
                           lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zhpgvd( int matrix_order, lapack_int itype, char jobz,
                           char uplo, lapack_int n, lapack_complex_double* ap,
                           lapack_complex_double* bp, double* w,
                           lapack_complex_double* z, lapack_int ldz );

lapack_int LAPACKE_chpgvx( int matrix_order, lapack_int itype, char jobz,
                           char range, char uplo, lapack_int n,
                           lapack_complex_float* ap, lapack_complex_float* bp,
                           float vl, float vu, lapack_int il, lapack_int iu,
                           float abstol, lapack_int* m, float* w,
                           lapack_complex_float* z, lapack_int ldz,
                           lapack_int* ifail );
lapack_int LAPACKE_zhpgvx( int matrix_order, lapack_int itype, char jobz,
                           char range, char uplo, lapack_int n,
                           lapack_complex_double* ap, lapack_complex_double* bp,
                           double vl, double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w,
                           lapack_complex_double* z, lapack_int ldz,
                           lapack_int* ifail );

lapack_int LAPACKE_chprfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* ap,
                           const lapack_complex_float* afp,
                           const lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_zhprfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* ap,
                           const lapack_complex_double* afp,
                           const lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_chpsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_float* ap,
                          lapack_int* ipiv, lapack_complex_float* b,
                          lapack_int ldb );
lapack_int LAPACKE_zhpsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_double* ap,
                          lapack_int* ipiv, lapack_complex_double* b,
                          lapack_int ldb );

lapack_int LAPACKE_chpsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* ap,
                           lapack_complex_float* afp, lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr );
lapack_int LAPACKE_zhpsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* ap,
                           lapack_complex_double* afp, lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr );

lapack_int LAPACKE_chptrd( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* ap, float* d, float* e,
                           lapack_complex_float* tau );
lapack_int LAPACKE_zhptrd( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* ap, double* d, double* e,
                           lapack_complex_double* tau );

lapack_int LAPACKE_chptrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* ap, lapack_int* ipiv );
lapack_int LAPACKE_zhptrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* ap, lapack_int* ipiv );

lapack_int LAPACKE_chptri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* ap, const lapack_int* ipiv );
lapack_int LAPACKE_zhptri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* ap, const lapack_int* ipiv );

lapack_int LAPACKE_chptrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* ap,
                           const lapack_int* ipiv, lapack_complex_float* b,
                           lapack_int ldb );
lapack_int LAPACKE_zhptrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* ap,
                           const lapack_int* ipiv, lapack_complex_double* b,
                           lapack_int ldb );

lapack_int LAPACKE_shsein( int matrix_order, char job, char eigsrc, char initv,
                           lapack_logical* select, lapack_int n, const float* h,
                           lapack_int ldh, float* wr, const float* wi,
                           float* vl, lapack_int ldvl, float* vr,
                           lapack_int ldvr, lapack_int mm, lapack_int* m,
                           lapack_int* ifaill, lapack_int* ifailr );
lapack_int LAPACKE_dhsein( int matrix_order, char job, char eigsrc, char initv,
                           lapack_logical* select, lapack_int n,
                           const double* h, lapack_int ldh, double* wr,
                           const double* wi, double* vl, lapack_int ldvl,
                           double* vr, lapack_int ldvr, lapack_int mm,
                           lapack_int* m, lapack_int* ifaill,
                           lapack_int* ifailr );
lapack_int LAPACKE_chsein( int matrix_order, char job, char eigsrc, char initv,
                           const lapack_logical* select, lapack_int n,
                           const lapack_complex_float* h, lapack_int ldh,
                           lapack_complex_float* w, lapack_complex_float* vl,
                           lapack_int ldvl, lapack_complex_float* vr,
                           lapack_int ldvr, lapack_int mm, lapack_int* m,
                           lapack_int* ifaill, lapack_int* ifailr );
lapack_int LAPACKE_zhsein( int matrix_order, char job, char eigsrc, char initv,
                           const lapack_logical* select, lapack_int n,
                           const lapack_complex_double* h, lapack_int ldh,
                           lapack_complex_double* w, lapack_complex_double* vl,
                           lapack_int ldvl, lapack_complex_double* vr,
                           lapack_int ldvr, lapack_int mm, lapack_int* m,
                           lapack_int* ifaill, lapack_int* ifailr );

lapack_int LAPACKE_shseqr( int matrix_order, char job, char compz, lapack_int n,
                           lapack_int ilo, lapack_int ihi, float* h,
                           lapack_int ldh, float* wr, float* wi, float* z,
                           lapack_int ldz );
lapack_int LAPACKE_dhseqr( int matrix_order, char job, char compz, lapack_int n,
                           lapack_int ilo, lapack_int ihi, double* h,
                           lapack_int ldh, double* wr, double* wi, double* z,
                           lapack_int ldz );
lapack_int LAPACKE_chseqr( int matrix_order, char job, char compz, lapack_int n,
                           lapack_int ilo, lapack_int ihi,
                           lapack_complex_float* h, lapack_int ldh,
                           lapack_complex_float* w, lapack_complex_float* z,
                           lapack_int ldz );
lapack_int LAPACKE_zhseqr( int matrix_order, char job, char compz, lapack_int n,
                           lapack_int ilo, lapack_int ihi,
                           lapack_complex_double* h, lapack_int ldh,
                           lapack_complex_double* w, lapack_complex_double* z,
                           lapack_int ldz );

lapack_int LAPACKE_clacgv( lapack_int n, lapack_complex_float* x,
                           lapack_int incx );
lapack_int LAPACKE_zlacgv( lapack_int n, lapack_complex_double* x,
                           lapack_int incx );

lapack_int LAPACKE_slacn2( lapack_int n, float* v, float* x, lapack_int* isgn,
                           float* est, lapack_int* kase, lapack_int* isave );
lapack_int LAPACKE_dlacn2( lapack_int n, double* v, double* x, lapack_int* isgn,
                           double* est, lapack_int* kase, lapack_int* isave );
lapack_int LAPACKE_clacn2( lapack_int n, lapack_complex_float* v,
                           lapack_complex_float* x,
                           float* est, lapack_int* kase, lapack_int* isave );
lapack_int LAPACKE_zlacn2( lapack_int n, lapack_complex_double* v,
                           lapack_complex_double* x,
                           double* est, lapack_int* kase, lapack_int* isave );

lapack_int LAPACKE_slacpy( int matrix_order, char uplo, lapack_int m,
                           lapack_int n, const float* a, lapack_int lda, float* b,
                           lapack_int ldb );
lapack_int LAPACKE_dlacpy( int matrix_order, char uplo, lapack_int m,
                           lapack_int n, const double* a, lapack_int lda, double* b,
                           lapack_int ldb );
lapack_int LAPACKE_clacpy( int matrix_order, char uplo, lapack_int m,
                           lapack_int n, const lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb );
lapack_int LAPACKE_zlacpy( int matrix_order, char uplo, lapack_int m,
                           lapack_int n, const lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb );

lapack_int LAPACKE_clacp2( int matrix_order, char uplo, lapack_int m,
                           lapack_int n, const float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zlacp2( int matrix_order, char uplo, lapack_int m,
                           lapack_int n, const double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_zlag2c( int matrix_order, lapack_int m, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           lapack_complex_float* sa, lapack_int ldsa );

lapack_int LAPACKE_slag2d( int matrix_order, lapack_int m, lapack_int n,
                           const float* sa, lapack_int ldsa, double* a,
                           lapack_int lda );

lapack_int LAPACKE_dlag2s( int matrix_order, lapack_int m, lapack_int n,
                           const double* a, lapack_int lda, float* sa,
                           lapack_int ldsa );

lapack_int LAPACKE_clag2z( int matrix_order, lapack_int m, lapack_int n,
                           const lapack_complex_float* sa, lapack_int ldsa,
                           lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_slagge( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku, const float* d,
                           float* a, lapack_int lda, lapack_int* iseed );
lapack_int LAPACKE_dlagge( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku, const double* d,
                           double* a, lapack_int lda, lapack_int* iseed );
lapack_int LAPACKE_clagge( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku, const float* d,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* iseed );
lapack_int LAPACKE_zlagge( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int kl, lapack_int ku, const double* d,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* iseed );

float LAPACKE_slamch( char cmach );
double LAPACKE_dlamch( char cmach );

float LAPACKE_slange( int matrix_order, char norm, lapack_int m,
                           lapack_int n, const float* a, lapack_int lda );
double LAPACKE_dlange( int matrix_order, char norm, lapack_int m,
                           lapack_int n, const double* a, lapack_int lda );
float LAPACKE_clange( int matrix_order, char norm, lapack_int m,
                           lapack_int n, const lapack_complex_float* a,
                           lapack_int lda );
double LAPACKE_zlange( int matrix_order, char norm, lapack_int m,
                           lapack_int n, const lapack_complex_double* a,
                           lapack_int lda );

float LAPACKE_clanhe( int matrix_order, char norm, char uplo, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda );
double LAPACKE_zlanhe( int matrix_order, char norm, char uplo, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda );

float LAPACKE_slansy( int matrix_order, char norm, char uplo, lapack_int n,
                           const float* a, lapack_int lda );
double LAPACKE_dlansy( int matrix_order, char norm, char uplo, lapack_int n,
                           const double* a, lapack_int lda );
float LAPACKE_clansy( int matrix_order, char norm, char uplo, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda );
double LAPACKE_zlansy( int matrix_order, char norm, char uplo, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda );

float LAPACKE_slantr( int matrix_order, char norm, char uplo, char diag,
                           lapack_int m, lapack_int n, const float* a,
                           lapack_int lda );
double LAPACKE_dlantr( int matrix_order, char norm, char uplo, char diag,
                           lapack_int m, lapack_int n, const double* a,
                           lapack_int lda );
float LAPACKE_clantr( int matrix_order, char norm, char uplo, char diag,
                           lapack_int m, lapack_int n, const lapack_complex_float* a,
                           lapack_int lda );
double LAPACKE_zlantr( int matrix_order, char norm, char uplo, char diag,
                           lapack_int m, lapack_int n, const lapack_complex_double* a,
                           lapack_int lda );


lapack_int LAPACKE_slarfb( int matrix_order, char side, char trans, char direct,
                           char storev, lapack_int m, lapack_int n,
                           lapack_int k, const float* v, lapack_int ldv,
                           const float* t, lapack_int ldt, float* c,
                           lapack_int ldc );
lapack_int LAPACKE_dlarfb( int matrix_order, char side, char trans, char direct,
                           char storev, lapack_int m, lapack_int n,
                           lapack_int k, const double* v, lapack_int ldv,
                           const double* t, lapack_int ldt, double* c,
                           lapack_int ldc );
lapack_int LAPACKE_clarfb( int matrix_order, char side, char trans, char direct,
                           char storev, lapack_int m, lapack_int n,
                           lapack_int k, const lapack_complex_float* v,
                           lapack_int ldv, const lapack_complex_float* t,
                           lapack_int ldt, lapack_complex_float* c,
                           lapack_int ldc );
lapack_int LAPACKE_zlarfb( int matrix_order, char side, char trans, char direct,
                           char storev, lapack_int m, lapack_int n,
                           lapack_int k, const lapack_complex_double* v,
                           lapack_int ldv, const lapack_complex_double* t,
                           lapack_int ldt, lapack_complex_double* c,
                           lapack_int ldc );

lapack_int LAPACKE_slarfg( lapack_int n, float* alpha, float* x,
                           lapack_int incx, float* tau );
lapack_int LAPACKE_dlarfg( lapack_int n, double* alpha, double* x,
                           lapack_int incx, double* tau );
lapack_int LAPACKE_clarfg( lapack_int n, lapack_complex_float* alpha,
                           lapack_complex_float* x, lapack_int incx,
                           lapack_complex_float* tau );
lapack_int LAPACKE_zlarfg( lapack_int n, lapack_complex_double* alpha,
                           lapack_complex_double* x, lapack_int incx,
                           lapack_complex_double* tau );

lapack_int LAPACKE_slarft( int matrix_order, char direct, char storev,
                           lapack_int n, lapack_int k, const float* v,
                           lapack_int ldv, const float* tau, float* t,
                           lapack_int ldt );
lapack_int LAPACKE_dlarft( int matrix_order, char direct, char storev,
                           lapack_int n, lapack_int k, const double* v,
                           lapack_int ldv, const double* tau, double* t,
                           lapack_int ldt );
lapack_int LAPACKE_clarft( int matrix_order, char direct, char storev,
                           lapack_int n, lapack_int k,
                           const lapack_complex_float* v, lapack_int ldv,
                           const lapack_complex_float* tau,
                           lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_zlarft( int matrix_order, char direct, char storev,
                           lapack_int n, lapack_int k,
                           const lapack_complex_double* v, lapack_int ldv,
                           const lapack_complex_double* tau,
                           lapack_complex_double* t, lapack_int ldt );

lapack_int LAPACKE_slarfx( int matrix_order, char side, lapack_int m,
                           lapack_int n, const float* v, float tau, float* c,
                           lapack_int ldc, float* work );
lapack_int LAPACKE_dlarfx( int matrix_order, char side, lapack_int m,
                           lapack_int n, const double* v, double tau, double* c,
                           lapack_int ldc, double* work );
lapack_int LAPACKE_clarfx( int matrix_order, char side, lapack_int m,
                           lapack_int n, const lapack_complex_float* v,
                           lapack_complex_float tau, lapack_complex_float* c,
                           lapack_int ldc, lapack_complex_float* work );
lapack_int LAPACKE_zlarfx( int matrix_order, char side, lapack_int m,
                           lapack_int n, const lapack_complex_double* v,
                           lapack_complex_double tau, lapack_complex_double* c,
                           lapack_int ldc, lapack_complex_double* work );

lapack_int LAPACKE_slarnv( lapack_int idist, lapack_int* iseed, lapack_int n,
                           float* x );
lapack_int LAPACKE_dlarnv( lapack_int idist, lapack_int* iseed, lapack_int n,
                           double* x );
lapack_int LAPACKE_clarnv( lapack_int idist, lapack_int* iseed, lapack_int n,
                           lapack_complex_float* x );
lapack_int LAPACKE_zlarnv( lapack_int idist, lapack_int* iseed, lapack_int n,
                           lapack_complex_double* x );

lapack_int LAPACKE_slaset( int matrix_order, char uplo, lapack_int m,
                           lapack_int n, float alpha, float beta, float* a,
                           lapack_int lda );
lapack_int LAPACKE_dlaset( int matrix_order, char uplo, lapack_int m,
                           lapack_int n, double alpha, double beta, double* a,
                           lapack_int lda );
lapack_int LAPACKE_claset( int matrix_order, char uplo, lapack_int m,
                           lapack_int n, lapack_complex_float alpha,
                           lapack_complex_float beta, lapack_complex_float* a,
                           lapack_int lda );
lapack_int LAPACKE_zlaset( int matrix_order, char uplo, lapack_int m,
                           lapack_int n, lapack_complex_double alpha,
                           lapack_complex_double beta, lapack_complex_double* a,
                           lapack_int lda );

lapack_int LAPACKE_slasrt( char id, lapack_int n, float* d );
lapack_int LAPACKE_dlasrt( char id, lapack_int n, double* d );

lapack_int LAPACKE_slaswp( int matrix_order, lapack_int n, float* a,
                           lapack_int lda, lapack_int k1, lapack_int k2,
                           const lapack_int* ipiv, lapack_int incx );
lapack_int LAPACKE_dlaswp( int matrix_order, lapack_int n, double* a,
                           lapack_int lda, lapack_int k1, lapack_int k2,
                           const lapack_int* ipiv, lapack_int incx );
lapack_int LAPACKE_claswp( int matrix_order, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int k1, lapack_int k2, const lapack_int* ipiv,
                           lapack_int incx );
lapack_int LAPACKE_zlaswp( int matrix_order, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int k1, lapack_int k2, const lapack_int* ipiv,
                           lapack_int incx );

lapack_int LAPACKE_slatms( int matrix_order, lapack_int m, lapack_int n,
                           char dist, lapack_int* iseed, char sym, float* d,
                           lapack_int mode, float cond, float dmax,
                           lapack_int kl, lapack_int ku, char pack, float* a,
                           lapack_int lda );
lapack_int LAPACKE_dlatms( int matrix_order, lapack_int m, lapack_int n,
                           char dist, lapack_int* iseed, char sym, double* d,
                           lapack_int mode, double cond, double dmax,
                           lapack_int kl, lapack_int ku, char pack, double* a,
                           lapack_int lda );
lapack_int LAPACKE_clatms( int matrix_order, lapack_int m, lapack_int n,
                           char dist, lapack_int* iseed, char sym, float* d,
                           lapack_int mode, float cond, float dmax,
                           lapack_int kl, lapack_int ku, char pack,
                           lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_zlatms( int matrix_order, lapack_int m, lapack_int n,
                           char dist, lapack_int* iseed, char sym, double* d,
                           lapack_int mode, double cond, double dmax,
                           lapack_int kl, lapack_int ku, char pack,
                           lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_slauum( int matrix_order, char uplo, lapack_int n, float* a,
                           lapack_int lda );
lapack_int LAPACKE_dlauum( int matrix_order, char uplo, lapack_int n, double* a,
                           lapack_int lda );
lapack_int LAPACKE_clauum( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_zlauum( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_sopgtr( int matrix_order, char uplo, lapack_int n,
                           const float* ap, const float* tau, float* q,
                           lapack_int ldq );
lapack_int LAPACKE_dopgtr( int matrix_order, char uplo, lapack_int n,
                           const double* ap, const double* tau, double* q,
                           lapack_int ldq );

lapack_int LAPACKE_sopmtr( int matrix_order, char side, char uplo, char trans,
                           lapack_int m, lapack_int n, const float* ap,
                           const float* tau, float* c, lapack_int ldc );
lapack_int LAPACKE_dopmtr( int matrix_order, char side, char uplo, char trans,
                           lapack_int m, lapack_int n, const double* ap,
                           const double* tau, double* c, lapack_int ldc );

lapack_int LAPACKE_sorgbr( int matrix_order, char vect, lapack_int m,
                           lapack_int n, lapack_int k, float* a, lapack_int lda,
                           const float* tau );
lapack_int LAPACKE_dorgbr( int matrix_order, char vect, lapack_int m,
                           lapack_int n, lapack_int k, double* a,
                           lapack_int lda, const double* tau );

lapack_int LAPACKE_sorghr( int matrix_order, lapack_int n, lapack_int ilo,
                           lapack_int ihi, float* a, lapack_int lda,
                           const float* tau );
lapack_int LAPACKE_dorghr( int matrix_order, lapack_int n, lapack_int ilo,
                           lapack_int ihi, double* a, lapack_int lda,
                           const double* tau );

lapack_int LAPACKE_sorglq( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, float* a, lapack_int lda,
                           const float* tau );
lapack_int LAPACKE_dorglq( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, double* a, lapack_int lda,
                           const double* tau );

lapack_int LAPACKE_sorgql( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, float* a, lapack_int lda,
                           const float* tau );
lapack_int LAPACKE_dorgql( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, double* a, lapack_int lda,
                           const double* tau );

lapack_int LAPACKE_sorgqr( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, float* a, lapack_int lda,
                           const float* tau );
lapack_int LAPACKE_dorgqr( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, double* a, lapack_int lda,
                           const double* tau );

lapack_int LAPACKE_sorgrq( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, float* a, lapack_int lda,
                           const float* tau );
lapack_int LAPACKE_dorgrq( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, double* a, lapack_int lda,
                           const double* tau );

lapack_int LAPACKE_sorgtr( int matrix_order, char uplo, lapack_int n, float* a,
                           lapack_int lda, const float* tau );
lapack_int LAPACKE_dorgtr( int matrix_order, char uplo, lapack_int n, double* a,
                           lapack_int lda, const double* tau );

lapack_int LAPACKE_sormbr( int matrix_order, char vect, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const float* a, lapack_int lda, const float* tau,
                           float* c, lapack_int ldc );
lapack_int LAPACKE_dormbr( int matrix_order, char vect, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const double* a, lapack_int lda, const double* tau,
                           double* c, lapack_int ldc );

lapack_int LAPACKE_sormhr( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int ilo,
                           lapack_int ihi, const float* a, lapack_int lda,
                           const float* tau, float* c, lapack_int ldc );
lapack_int LAPACKE_dormhr( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int ilo,
                           lapack_int ihi, const double* a, lapack_int lda,
                           const double* tau, double* c, lapack_int ldc );

lapack_int LAPACKE_sormlq( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const float* a, lapack_int lda, const float* tau,
                           float* c, lapack_int ldc );
lapack_int LAPACKE_dormlq( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const double* a, lapack_int lda, const double* tau,
                           double* c, lapack_int ldc );

lapack_int LAPACKE_sormql( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const float* a, lapack_int lda, const float* tau,
                           float* c, lapack_int ldc );
lapack_int LAPACKE_dormql( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const double* a, lapack_int lda, const double* tau,
                           double* c, lapack_int ldc );

lapack_int LAPACKE_sormqr( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const float* a, lapack_int lda, const float* tau,
                           float* c, lapack_int ldc );
lapack_int LAPACKE_dormqr( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const double* a, lapack_int lda, const double* tau,
                           double* c, lapack_int ldc );

lapack_int LAPACKE_sormrq( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const float* a, lapack_int lda, const float* tau,
                           float* c, lapack_int ldc );
lapack_int LAPACKE_dormrq( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const double* a, lapack_int lda, const double* tau,
                           double* c, lapack_int ldc );

lapack_int LAPACKE_sormrz( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           lapack_int l, const float* a, lapack_int lda,
                           const float* tau, float* c, lapack_int ldc );
lapack_int LAPACKE_dormrz( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           lapack_int l, const double* a, lapack_int lda,
                           const double* tau, double* c, lapack_int ldc );

lapack_int LAPACKE_sormtr( int matrix_order, char side, char uplo, char trans,
                           lapack_int m, lapack_int n, const float* a,
                           lapack_int lda, const float* tau, float* c,
                           lapack_int ldc );
lapack_int LAPACKE_dormtr( int matrix_order, char side, char uplo, char trans,
                           lapack_int m, lapack_int n, const double* a,
                           lapack_int lda, const double* tau, double* c,
                           lapack_int ldc );

lapack_int LAPACKE_spbcon( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, const float* ab, lapack_int ldab,
                           float anorm, float* rcond );
lapack_int LAPACKE_dpbcon( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, const double* ab, lapack_int ldab,
                           double anorm, double* rcond );
lapack_int LAPACKE_cpbcon( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, const lapack_complex_float* ab,
                           lapack_int ldab, float anorm, float* rcond );
lapack_int LAPACKE_zpbcon( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, const lapack_complex_double* ab,
                           lapack_int ldab, double anorm, double* rcond );

lapack_int LAPACKE_spbequ( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, const float* ab, lapack_int ldab,
                           float* s, float* scond, float* amax );
lapack_int LAPACKE_dpbequ( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, const double* ab, lapack_int ldab,
                           double* s, double* scond, double* amax );
lapack_int LAPACKE_cpbequ( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, const lapack_complex_float* ab,
                           lapack_int ldab, float* s, float* scond,
                           float* amax );
lapack_int LAPACKE_zpbequ( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, const lapack_complex_double* ab,
                           lapack_int ldab, double* s, double* scond,
                           double* amax );

lapack_int LAPACKE_spbrfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs, const float* ab,
                           lapack_int ldab, const float* afb, lapack_int ldafb,
                           const float* b, lapack_int ldb, float* x,
                           lapack_int ldx, float* ferr, float* berr );
lapack_int LAPACKE_dpbrfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs, const double* ab,
                           lapack_int ldab, const double* afb, lapack_int ldafb,
                           const double* b, lapack_int ldb, double* x,
                           lapack_int ldx, double* ferr, double* berr );
lapack_int LAPACKE_cpbrfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs,
                           const lapack_complex_float* ab, lapack_int ldab,
                           const lapack_complex_float* afb, lapack_int ldafb,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_zpbrfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs,
                           const lapack_complex_double* ab, lapack_int ldab,
                           const lapack_complex_double* afb, lapack_int ldafb,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_spbstf( int matrix_order, char uplo, lapack_int n,
                           lapack_int kb, float* bb, lapack_int ldbb );
lapack_int LAPACKE_dpbstf( int matrix_order, char uplo, lapack_int n,
                           lapack_int kb, double* bb, lapack_int ldbb );
lapack_int LAPACKE_cpbstf( int matrix_order, char uplo, lapack_int n,
                           lapack_int kb, lapack_complex_float* bb,
                           lapack_int ldbb );
lapack_int LAPACKE_zpbstf( int matrix_order, char uplo, lapack_int n,
                           lapack_int kb, lapack_complex_double* bb,
                           lapack_int ldbb );

lapack_int LAPACKE_spbsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int kd, lapack_int nrhs, float* ab,
                          lapack_int ldab, float* b, lapack_int ldb );
lapack_int LAPACKE_dpbsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int kd, lapack_int nrhs, double* ab,
                          lapack_int ldab, double* b, lapack_int ldb );
lapack_int LAPACKE_cpbsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int kd, lapack_int nrhs,
                          lapack_complex_float* ab, lapack_int ldab,
                          lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zpbsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int kd, lapack_int nrhs,
                          lapack_complex_double* ab, lapack_int ldab,
                          lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_spbsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs, float* ab,
                           lapack_int ldab, float* afb, lapack_int ldafb,
                           char* equed, float* s, float* b, lapack_int ldb,
                           float* x, lapack_int ldx, float* rcond, float* ferr,
                           float* berr );
lapack_int LAPACKE_dpbsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs, double* ab,
                           lapack_int ldab, double* afb, lapack_int ldafb,
                           char* equed, double* s, double* b, lapack_int ldb,
                           double* x, lapack_int ldx, double* rcond,
                           double* ferr, double* berr );
lapack_int LAPACKE_cpbsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs,
                           lapack_complex_float* ab, lapack_int ldab,
                           lapack_complex_float* afb, lapack_int ldafb,
                           char* equed, float* s, lapack_complex_float* b,
                           lapack_int ldb, lapack_complex_float* x,
                           lapack_int ldx, float* rcond, float* ferr,
                           float* berr );
lapack_int LAPACKE_zpbsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs,
                           lapack_complex_double* ab, lapack_int ldab,
                           lapack_complex_double* afb, lapack_int ldafb,
                           char* equed, double* s, lapack_complex_double* b,
                           lapack_int ldb, lapack_complex_double* x,
                           lapack_int ldx, double* rcond, double* ferr,
                           double* berr );

lapack_int LAPACKE_spbtrf( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, float* ab, lapack_int ldab );
lapack_int LAPACKE_dpbtrf( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, double* ab, lapack_int ldab );
lapack_int LAPACKE_cpbtrf( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, lapack_complex_float* ab,
                           lapack_int ldab );
lapack_int LAPACKE_zpbtrf( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, lapack_complex_double* ab,
                           lapack_int ldab );

lapack_int LAPACKE_spbtrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs, const float* ab,
                           lapack_int ldab, float* b, lapack_int ldb );
lapack_int LAPACKE_dpbtrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs, const double* ab,
                           lapack_int ldab, double* b, lapack_int ldb );
lapack_int LAPACKE_cpbtrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs,
                           const lapack_complex_float* ab, lapack_int ldab,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zpbtrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int kd, lapack_int nrhs,
                           const lapack_complex_double* ab, lapack_int ldab,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_spftrf( int matrix_order, char transr, char uplo,
                           lapack_int n, float* a );
lapack_int LAPACKE_dpftrf( int matrix_order, char transr, char uplo,
                           lapack_int n, double* a );
lapack_int LAPACKE_cpftrf( int matrix_order, char transr, char uplo,
                           lapack_int n, lapack_complex_float* a );
lapack_int LAPACKE_zpftrf( int matrix_order, char transr, char uplo,
                           lapack_int n, lapack_complex_double* a );

lapack_int LAPACKE_spftri( int matrix_order, char transr, char uplo,
                           lapack_int n, float* a );
lapack_int LAPACKE_dpftri( int matrix_order, char transr, char uplo,
                           lapack_int n, double* a );
lapack_int LAPACKE_cpftri( int matrix_order, char transr, char uplo,
                           lapack_int n, lapack_complex_float* a );
lapack_int LAPACKE_zpftri( int matrix_order, char transr, char uplo,
                           lapack_int n, lapack_complex_double* a );

lapack_int LAPACKE_spftrs( int matrix_order, char transr, char uplo,
                           lapack_int n, lapack_int nrhs, const float* a,
                           float* b, lapack_int ldb );
lapack_int LAPACKE_dpftrs( int matrix_order, char transr, char uplo,
                           lapack_int n, lapack_int nrhs, const double* a,
                           double* b, lapack_int ldb );
lapack_int LAPACKE_cpftrs( int matrix_order, char transr, char uplo,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_float* a,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zpftrs( int matrix_order, char transr, char uplo,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_double* a,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_spocon( int matrix_order, char uplo, lapack_int n,
                           const float* a, lapack_int lda, float anorm,
                           float* rcond );
lapack_int LAPACKE_dpocon( int matrix_order, char uplo, lapack_int n,
                           const double* a, lapack_int lda, double anorm,
                           double* rcond );
lapack_int LAPACKE_cpocon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           float anorm, float* rcond );
lapack_int LAPACKE_zpocon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           double anorm, double* rcond );

lapack_int LAPACKE_spoequ( int matrix_order, lapack_int n, const float* a,
                           lapack_int lda, float* s, float* scond,
                           float* amax );
lapack_int LAPACKE_dpoequ( int matrix_order, lapack_int n, const double* a,
                           lapack_int lda, double* s, double* scond,
                           double* amax );
lapack_int LAPACKE_cpoequ( int matrix_order, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           float* s, float* scond, float* amax );
lapack_int LAPACKE_zpoequ( int matrix_order, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           double* s, double* scond, double* amax );

lapack_int LAPACKE_spoequb( int matrix_order, lapack_int n, const float* a,
                            lapack_int lda, float* s, float* scond,
                            float* amax );
lapack_int LAPACKE_dpoequb( int matrix_order, lapack_int n, const double* a,
                            lapack_int lda, double* s, double* scond,
                            double* amax );
lapack_int LAPACKE_cpoequb( int matrix_order, lapack_int n,
                            const lapack_complex_float* a, lapack_int lda,
                            float* s, float* scond, float* amax );
lapack_int LAPACKE_zpoequb( int matrix_order, lapack_int n,
                            const lapack_complex_double* a, lapack_int lda,
                            double* s, double* scond, double* amax );

lapack_int LAPACKE_sporfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* a, lapack_int lda,
                           const float* af, lapack_int ldaf, const float* b,
                           lapack_int ldb, float* x, lapack_int ldx,
                           float* ferr, float* berr );
lapack_int LAPACKE_dporfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* a, lapack_int lda,
                           const double* af, lapack_int ldaf, const double* b,
                           lapack_int ldb, double* x, lapack_int ldx,
                           double* ferr, double* berr );
lapack_int LAPACKE_cporfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* af,
                           lapack_int ldaf, const lapack_complex_float* b,
                           lapack_int ldb, lapack_complex_float* x,
                           lapack_int ldx, float* ferr, float* berr );
lapack_int LAPACKE_zporfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* af,
                           lapack_int ldaf, const lapack_complex_double* b,
                           lapack_int ldb, lapack_complex_double* x,
                           lapack_int ldx, double* ferr, double* berr );

lapack_int LAPACKE_sporfsx( int matrix_order, char uplo, char equed,
                            lapack_int n, lapack_int nrhs, const float* a,
                            lapack_int lda, const float* af, lapack_int ldaf,
                            const float* s, const float* b, lapack_int ldb,
                            float* x, lapack_int ldx, float* rcond, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_dporfsx( int matrix_order, char uplo, char equed,
                            lapack_int n, lapack_int nrhs, const double* a,
                            lapack_int lda, const double* af, lapack_int ldaf,
                            const double* s, const double* b, lapack_int ldb,
                            double* x, lapack_int ldx, double* rcond,
                            double* berr, lapack_int n_err_bnds,
                            double* err_bnds_norm, double* err_bnds_comp,
                            lapack_int nparams, double* params );
lapack_int LAPACKE_cporfsx( int matrix_order, char uplo, char equed,
                            lapack_int n, lapack_int nrhs,
                            const lapack_complex_float* a, lapack_int lda,
                            const lapack_complex_float* af, lapack_int ldaf,
                            const float* s, const lapack_complex_float* b,
                            lapack_int ldb, lapack_complex_float* x,
                            lapack_int ldx, float* rcond, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_zporfsx( int matrix_order, char uplo, char equed,
                            lapack_int n, lapack_int nrhs,
                            const lapack_complex_double* a, lapack_int lda,
                            const lapack_complex_double* af, lapack_int ldaf,
                            const double* s, const lapack_complex_double* b,
                            lapack_int ldb, lapack_complex_double* x,
                            lapack_int ldx, double* rcond, double* berr,
                            lapack_int n_err_bnds, double* err_bnds_norm,
                            double* err_bnds_comp, lapack_int nparams,
                            double* params );

lapack_int LAPACKE_sposv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, float* a, lapack_int lda, float* b,
                          lapack_int ldb );
lapack_int LAPACKE_dposv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, double* a, lapack_int lda, double* b,
                          lapack_int ldb );
lapack_int LAPACKE_cposv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_float* a,
                          lapack_int lda, lapack_complex_float* b,
                          lapack_int ldb );
lapack_int LAPACKE_zposv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_double* a,
                          lapack_int lda, lapack_complex_double* b,
                          lapack_int ldb );
lapack_int LAPACKE_dsposv( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, double* a, lapack_int lda,
                           double* b, lapack_int ldb, double* x, lapack_int ldx,
                           lapack_int* iter );
lapack_int LAPACKE_zcposv( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb, lapack_complex_double* x,
                           lapack_int ldx, lapack_int* iter );

lapack_int LAPACKE_sposvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, float* a, lapack_int lda, float* af,
                           lapack_int ldaf, char* equed, float* s, float* b,
                           lapack_int ldb, float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr );
lapack_int LAPACKE_dposvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, double* a, lapack_int lda,
                           double* af, lapack_int ldaf, char* equed, double* s,
                           double* b, lapack_int ldb, double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr );
lapack_int LAPACKE_cposvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* af,
                           lapack_int ldaf, char* equed, float* s,
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr );
lapack_int LAPACKE_zposvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* af,
                           lapack_int ldaf, char* equed, double* s,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr );

lapack_int LAPACKE_sposvxx( int matrix_order, char fact, char uplo,
                            lapack_int n, lapack_int nrhs, float* a,
                            lapack_int lda, float* af, lapack_int ldaf,
                            char* equed, float* s, float* b, lapack_int ldb,
                            float* x, lapack_int ldx, float* rcond,
                            float* rpvgrw, float* berr, lapack_int n_err_bnds,
                            float* err_bnds_norm, float* err_bnds_comp,
                            lapack_int nparams, float* params );
lapack_int LAPACKE_dposvxx( int matrix_order, char fact, char uplo,
                            lapack_int n, lapack_int nrhs, double* a,
                            lapack_int lda, double* af, lapack_int ldaf,
                            char* equed, double* s, double* b, lapack_int ldb,
                            double* x, lapack_int ldx, double* rcond,
                            double* rpvgrw, double* berr, lapack_int n_err_bnds,
                            double* err_bnds_norm, double* err_bnds_comp,
                            lapack_int nparams, double* params );
lapack_int LAPACKE_cposvxx( int matrix_order, char fact, char uplo,
                            lapack_int n, lapack_int nrhs,
                            lapack_complex_float* a, lapack_int lda,
                            lapack_complex_float* af, lapack_int ldaf,
                            char* equed, float* s, lapack_complex_float* b,
                            lapack_int ldb, lapack_complex_float* x,
                            lapack_int ldx, float* rcond, float* rpvgrw,
                            float* berr, lapack_int n_err_bnds,
                            float* err_bnds_norm, float* err_bnds_comp,
                            lapack_int nparams, float* params );
lapack_int LAPACKE_zposvxx( int matrix_order, char fact, char uplo,
                            lapack_int n, lapack_int nrhs,
                            lapack_complex_double* a, lapack_int lda,
                            lapack_complex_double* af, lapack_int ldaf,
                            char* equed, double* s, lapack_complex_double* b,
                            lapack_int ldb, lapack_complex_double* x,
                            lapack_int ldx, double* rcond, double* rpvgrw,
                            double* berr, lapack_int n_err_bnds,
                            double* err_bnds_norm, double* err_bnds_comp,
                            lapack_int nparams, double* params );

lapack_int LAPACKE_spotrf( int matrix_order, char uplo, lapack_int n, float* a,
                           lapack_int lda );
lapack_int LAPACKE_dpotrf( int matrix_order, char uplo, lapack_int n, double* a,
                           lapack_int lda );
lapack_int LAPACKE_cpotrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_zpotrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_spotri( int matrix_order, char uplo, lapack_int n, float* a,
                           lapack_int lda );
lapack_int LAPACKE_dpotri( int matrix_order, char uplo, lapack_int n, double* a,
                           lapack_int lda );
lapack_int LAPACKE_cpotri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_zpotri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_spotrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* a, lapack_int lda,
                           float* b, lapack_int ldb );
lapack_int LAPACKE_dpotrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* a, lapack_int lda,
                           double* b, lapack_int ldb );
lapack_int LAPACKE_cpotrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb );
lapack_int LAPACKE_zpotrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb );

lapack_int LAPACKE_sppcon( int matrix_order, char uplo, lapack_int n,
                           const float* ap, float anorm, float* rcond );
lapack_int LAPACKE_dppcon( int matrix_order, char uplo, lapack_int n,
                           const double* ap, double anorm, double* rcond );
lapack_int LAPACKE_cppcon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_float* ap, float anorm,
                           float* rcond );
lapack_int LAPACKE_zppcon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_double* ap, double anorm,
                           double* rcond );

lapack_int LAPACKE_sppequ( int matrix_order, char uplo, lapack_int n,
                           const float* ap, float* s, float* scond,
                           float* amax );
lapack_int LAPACKE_dppequ( int matrix_order, char uplo, lapack_int n,
                           const double* ap, double* s, double* scond,
                           double* amax );
lapack_int LAPACKE_cppequ( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_float* ap, float* s,
                           float* scond, float* amax );
lapack_int LAPACKE_zppequ( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_double* ap, double* s,
                           double* scond, double* amax );

lapack_int LAPACKE_spprfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* ap, const float* afp,
                           const float* b, lapack_int ldb, float* x,
                           lapack_int ldx, float* ferr, float* berr );
lapack_int LAPACKE_dpprfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* ap, const double* afp,
                           const double* b, lapack_int ldb, double* x,
                           lapack_int ldx, double* ferr, double* berr );
lapack_int LAPACKE_cpprfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* ap,
                           const lapack_complex_float* afp,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_zpprfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* ap,
                           const lapack_complex_double* afp,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_sppsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, float* ap, float* b,
                          lapack_int ldb );
lapack_int LAPACKE_dppsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, double* ap, double* b,
                          lapack_int ldb );
lapack_int LAPACKE_cppsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_float* ap,
                          lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zppsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_double* ap,
                          lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sppsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, float* ap, float* afp, char* equed,
                           float* s, float* b, lapack_int ldb, float* x,
                           lapack_int ldx, float* rcond, float* ferr,
                           float* berr );
lapack_int LAPACKE_dppsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, double* ap, double* afp,
                           char* equed, double* s, double* b, lapack_int ldb,
                           double* x, lapack_int ldx, double* rcond,
                           double* ferr, double* berr );
lapack_int LAPACKE_cppsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, lapack_complex_float* ap,
                           lapack_complex_float* afp, char* equed, float* s,
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr );
lapack_int LAPACKE_zppsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, lapack_complex_double* ap,
                           lapack_complex_double* afp, char* equed, double* s,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr );

lapack_int LAPACKE_spptrf( int matrix_order, char uplo, lapack_int n,
                           float* ap );
lapack_int LAPACKE_dpptrf( int matrix_order, char uplo, lapack_int n,
                           double* ap );
lapack_int LAPACKE_cpptrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* ap );
lapack_int LAPACKE_zpptrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* ap );

lapack_int LAPACKE_spptri( int matrix_order, char uplo, lapack_int n,
                           float* ap );
lapack_int LAPACKE_dpptri( int matrix_order, char uplo, lapack_int n,
                           double* ap );
lapack_int LAPACKE_cpptri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* ap );
lapack_int LAPACKE_zpptri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* ap );

lapack_int LAPACKE_spptrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* ap, float* b,
                           lapack_int ldb );
lapack_int LAPACKE_dpptrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* ap, double* b,
                           lapack_int ldb );
lapack_int LAPACKE_cpptrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* ap,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zpptrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* ap,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_spstrf( int matrix_order, char uplo, lapack_int n, float* a,
                           lapack_int lda, lapack_int* piv, lapack_int* rank,
                           float tol );
lapack_int LAPACKE_dpstrf( int matrix_order, char uplo, lapack_int n, double* a,
                           lapack_int lda, lapack_int* piv, lapack_int* rank,
                           double tol );
lapack_int LAPACKE_cpstrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* piv, lapack_int* rank, float tol );
lapack_int LAPACKE_zpstrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* piv, lapack_int* rank, double tol );

lapack_int LAPACKE_sptcon( lapack_int n, const float* d, const float* e,
                           float anorm, float* rcond );
lapack_int LAPACKE_dptcon( lapack_int n, const double* d, const double* e,
                           double anorm, double* rcond );
lapack_int LAPACKE_cptcon( lapack_int n, const float* d,
                           const lapack_complex_float* e, float anorm,
                           float* rcond );
lapack_int LAPACKE_zptcon( lapack_int n, const double* d,
                           const lapack_complex_double* e, double anorm,
                           double* rcond );

lapack_int LAPACKE_spteqr( int matrix_order, char compz, lapack_int n, float* d,
                           float* e, float* z, lapack_int ldz );
lapack_int LAPACKE_dpteqr( int matrix_order, char compz, lapack_int n,
                           double* d, double* e, double* z, lapack_int ldz );
lapack_int LAPACKE_cpteqr( int matrix_order, char compz, lapack_int n, float* d,
                           float* e, lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zpteqr( int matrix_order, char compz, lapack_int n,
                           double* d, double* e, lapack_complex_double* z,
                           lapack_int ldz );

lapack_int LAPACKE_sptrfs( int matrix_order, lapack_int n, lapack_int nrhs,
                           const float* d, const float* e, const float* df,
                           const float* ef, const float* b, lapack_int ldb,
                           float* x, lapack_int ldx, float* ferr, float* berr );
lapack_int LAPACKE_dptrfs( int matrix_order, lapack_int n, lapack_int nrhs,
                           const double* d, const double* e, const double* df,
                           const double* ef, const double* b, lapack_int ldb,
                           double* x, lapack_int ldx, double* ferr,
                           double* berr );
lapack_int LAPACKE_cptrfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* d,
                           const lapack_complex_float* e, const float* df,
                           const lapack_complex_float* ef,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_zptrfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* d,
                           const lapack_complex_double* e, const double* df,
                           const lapack_complex_double* ef,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_sptsv( int matrix_order, lapack_int n, lapack_int nrhs,
                          float* d, float* e, float* b, lapack_int ldb );
lapack_int LAPACKE_dptsv( int matrix_order, lapack_int n, lapack_int nrhs,
                          double* d, double* e, double* b, lapack_int ldb );
lapack_int LAPACKE_cptsv( int matrix_order, lapack_int n, lapack_int nrhs,
                          float* d, lapack_complex_float* e,
                          lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zptsv( int matrix_order, lapack_int n, lapack_int nrhs,
                          double* d, lapack_complex_double* e,
                          lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sptsvx( int matrix_order, char fact, lapack_int n,
                           lapack_int nrhs, const float* d, const float* e,
                           float* df, float* ef, const float* b, lapack_int ldb,
                           float* x, lapack_int ldx, float* rcond, float* ferr,
                           float* berr );
lapack_int LAPACKE_dptsvx( int matrix_order, char fact, lapack_int n,
                           lapack_int nrhs, const double* d, const double* e,
                           double* df, double* ef, const double* b,
                           lapack_int ldb, double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr );
lapack_int LAPACKE_cptsvx( int matrix_order, char fact, lapack_int n,
                           lapack_int nrhs, const float* d,
                           const lapack_complex_float* e, float* df,
                           lapack_complex_float* ef,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr );
lapack_int LAPACKE_zptsvx( int matrix_order, char fact, lapack_int n,
                           lapack_int nrhs, const double* d,
                           const lapack_complex_double* e, double* df,
                           lapack_complex_double* ef,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr );

lapack_int LAPACKE_spttrf( lapack_int n, float* d, float* e );
lapack_int LAPACKE_dpttrf( lapack_int n, double* d, double* e );
lapack_int LAPACKE_cpttrf( lapack_int n, float* d, lapack_complex_float* e );
lapack_int LAPACKE_zpttrf( lapack_int n, double* d, lapack_complex_double* e );

lapack_int LAPACKE_spttrs( int matrix_order, lapack_int n, lapack_int nrhs,
                           const float* d, const float* e, float* b,
                           lapack_int ldb );
lapack_int LAPACKE_dpttrs( int matrix_order, lapack_int n, lapack_int nrhs,
                           const double* d, const double* e, double* b,
                           lapack_int ldb );
lapack_int LAPACKE_cpttrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* d,
                           const lapack_complex_float* e,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zpttrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* d,
                           const lapack_complex_double* e,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_ssbev( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_int kd, float* ab, lapack_int ldab, float* w,
                          float* z, lapack_int ldz );
lapack_int LAPACKE_dsbev( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_int kd, double* ab, lapack_int ldab, double* w,
                          double* z, lapack_int ldz );

lapack_int LAPACKE_ssbevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_int kd, float* ab, lapack_int ldab, float* w,
                           float* z, lapack_int ldz );
lapack_int LAPACKE_dsbevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_int kd, double* ab, lapack_int ldab,
                           double* w, double* z, lapack_int ldz );

lapack_int LAPACKE_ssbevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_int kd, float* ab,
                           lapack_int ldab, float* q, lapack_int ldq, float vl,
                           float vu, lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, float* z, lapack_int ldz,
                           lapack_int* ifail );
lapack_int LAPACKE_dsbevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_int kd, double* ab,
                           lapack_int ldab, double* q, lapack_int ldq,
                           double vl, double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w, double* z,
                           lapack_int ldz, lapack_int* ifail );

lapack_int LAPACKE_ssbgst( int matrix_order, char vect, char uplo, lapack_int n,
                           lapack_int ka, lapack_int kb, float* ab,
                           lapack_int ldab, const float* bb, lapack_int ldbb,
                           float* x, lapack_int ldx );
lapack_int LAPACKE_dsbgst( int matrix_order, char vect, char uplo, lapack_int n,
                           lapack_int ka, lapack_int kb, double* ab,
                           lapack_int ldab, const double* bb, lapack_int ldbb,
                           double* x, lapack_int ldx );

lapack_int LAPACKE_ssbgv( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_int ka, lapack_int kb, float* ab,
                          lapack_int ldab, float* bb, lapack_int ldbb, float* w,
                          float* z, lapack_int ldz );
lapack_int LAPACKE_dsbgv( int matrix_order, char jobz, char uplo, lapack_int n,
                          lapack_int ka, lapack_int kb, double* ab,
                          lapack_int ldab, double* bb, lapack_int ldbb,
                          double* w, double* z, lapack_int ldz );

lapack_int LAPACKE_ssbgvd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_int ka, lapack_int kb, float* ab,
                           lapack_int ldab, float* bb, lapack_int ldbb,
                           float* w, float* z, lapack_int ldz );
lapack_int LAPACKE_dsbgvd( int matrix_order, char jobz, char uplo, lapack_int n,
                           lapack_int ka, lapack_int kb, double* ab,
                           lapack_int ldab, double* bb, lapack_int ldbb,
                           double* w, double* z, lapack_int ldz );

lapack_int LAPACKE_ssbgvx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_int ka, lapack_int kb,
                           float* ab, lapack_int ldab, float* bb,
                           lapack_int ldbb, float* q, lapack_int ldq, float vl,
                           float vu, lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, float* z, lapack_int ldz,
                           lapack_int* ifail );
lapack_int LAPACKE_dsbgvx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, lapack_int ka, lapack_int kb,
                           double* ab, lapack_int ldab, double* bb,
                           lapack_int ldbb, double* q, lapack_int ldq,
                           double vl, double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w, double* z,
                           lapack_int ldz, lapack_int* ifail );

lapack_int LAPACKE_ssbtrd( int matrix_order, char vect, char uplo, lapack_int n,
                           lapack_int kd, float* ab, lapack_int ldab, float* d,
                           float* e, float* q, lapack_int ldq );
lapack_int LAPACKE_dsbtrd( int matrix_order, char vect, char uplo, lapack_int n,
                           lapack_int kd, double* ab, lapack_int ldab,
                           double* d, double* e, double* q, lapack_int ldq );

lapack_int LAPACKE_ssfrk( int matrix_order, char transr, char uplo, char trans,
                          lapack_int n, lapack_int k, float alpha,
                          const float* a, lapack_int lda, float beta,
                          float* c );
lapack_int LAPACKE_dsfrk( int matrix_order, char transr, char uplo, char trans,
                          lapack_int n, lapack_int k, double alpha,
                          const double* a, lapack_int lda, double beta,
                          double* c );

lapack_int LAPACKE_sspcon( int matrix_order, char uplo, lapack_int n,
                           const float* ap, const lapack_int* ipiv, float anorm,
                           float* rcond );
lapack_int LAPACKE_dspcon( int matrix_order, char uplo, lapack_int n,
                           const double* ap, const lapack_int* ipiv,
                           double anorm, double* rcond );
lapack_int LAPACKE_cspcon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_float* ap,
                           const lapack_int* ipiv, float anorm, float* rcond );
lapack_int LAPACKE_zspcon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_double* ap,
                           const lapack_int* ipiv, double anorm,
                           double* rcond );

lapack_int LAPACKE_sspev( int matrix_order, char jobz, char uplo, lapack_int n,
                          float* ap, float* w, float* z, lapack_int ldz );
lapack_int LAPACKE_dspev( int matrix_order, char jobz, char uplo, lapack_int n,
                          double* ap, double* w, double* z, lapack_int ldz );

lapack_int LAPACKE_sspevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           float* ap, float* w, float* z, lapack_int ldz );
lapack_int LAPACKE_dspevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           double* ap, double* w, double* z, lapack_int ldz );

lapack_int LAPACKE_sspevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, float* ap, float vl, float vu,
                           lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, float* z, lapack_int ldz,
                           lapack_int* ifail );
lapack_int LAPACKE_dspevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, double* ap, double vl, double vu,
                           lapack_int il, lapack_int iu, double abstol,
                           lapack_int* m, double* w, double* z, lapack_int ldz,
                           lapack_int* ifail );

lapack_int LAPACKE_sspgst( int matrix_order, lapack_int itype, char uplo,
                           lapack_int n, float* ap, const float* bp );
lapack_int LAPACKE_dspgst( int matrix_order, lapack_int itype, char uplo,
                           lapack_int n, double* ap, const double* bp );

lapack_int LAPACKE_sspgv( int matrix_order, lapack_int itype, char jobz,
                          char uplo, lapack_int n, float* ap, float* bp,
                          float* w, float* z, lapack_int ldz );
lapack_int LAPACKE_dspgv( int matrix_order, lapack_int itype, char jobz,
                          char uplo, lapack_int n, double* ap, double* bp,
                          double* w, double* z, lapack_int ldz );

lapack_int LAPACKE_sspgvd( int matrix_order, lapack_int itype, char jobz,
                           char uplo, lapack_int n, float* ap, float* bp,
                           float* w, float* z, lapack_int ldz );
lapack_int LAPACKE_dspgvd( int matrix_order, lapack_int itype, char jobz,
                           char uplo, lapack_int n, double* ap, double* bp,
                           double* w, double* z, lapack_int ldz );

lapack_int LAPACKE_sspgvx( int matrix_order, lapack_int itype, char jobz,
                           char range, char uplo, lapack_int n, float* ap,
                           float* bp, float vl, float vu, lapack_int il,
                           lapack_int iu, float abstol, lapack_int* m, float* w,
                           float* z, lapack_int ldz, lapack_int* ifail );
lapack_int LAPACKE_dspgvx( int matrix_order, lapack_int itype, char jobz,
                           char range, char uplo, lapack_int n, double* ap,
                           double* bp, double vl, double vu, lapack_int il,
                           lapack_int iu, double abstol, lapack_int* m,
                           double* w, double* z, lapack_int ldz,
                           lapack_int* ifail );

lapack_int LAPACKE_ssprfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* ap, const float* afp,
                           const lapack_int* ipiv, const float* b,
                           lapack_int ldb, float* x, lapack_int ldx,
                           float* ferr, float* berr );
lapack_int LAPACKE_dsprfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* ap, const double* afp,
                           const lapack_int* ipiv, const double* b,
                           lapack_int ldb, double* x, lapack_int ldx,
                           double* ferr, double* berr );
lapack_int LAPACKE_csprfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* ap,
                           const lapack_complex_float* afp,
                           const lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_zsprfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* ap,
                           const lapack_complex_double* afp,
                           const lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_sspsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, float* ap, lapack_int* ipiv,
                          float* b, lapack_int ldb );
lapack_int LAPACKE_dspsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, double* ap, lapack_int* ipiv,
                          double* b, lapack_int ldb );
lapack_int LAPACKE_cspsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_float* ap,
                          lapack_int* ipiv, lapack_complex_float* b,
                          lapack_int ldb );
lapack_int LAPACKE_zspsv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_double* ap,
                          lapack_int* ipiv, lapack_complex_double* b,
                          lapack_int ldb );

lapack_int LAPACKE_sspsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const float* ap, float* afp,
                           lapack_int* ipiv, const float* b, lapack_int ldb,
                           float* x, lapack_int ldx, float* rcond, float* ferr,
                           float* berr );
lapack_int LAPACKE_dspsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const double* ap, double* afp,
                           lapack_int* ipiv, const double* b, lapack_int ldb,
                           double* x, lapack_int ldx, double* rcond,
                           double* ferr, double* berr );
lapack_int LAPACKE_cspsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* ap,
                           lapack_complex_float* afp, lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr );
lapack_int LAPACKE_zspsvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* ap,
                           lapack_complex_double* afp, lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr );

lapack_int LAPACKE_ssptrd( int matrix_order, char uplo, lapack_int n, float* ap,
                           float* d, float* e, float* tau );
lapack_int LAPACKE_dsptrd( int matrix_order, char uplo, lapack_int n,
                           double* ap, double* d, double* e, double* tau );

lapack_int LAPACKE_ssptrf( int matrix_order, char uplo, lapack_int n, float* ap,
                           lapack_int* ipiv );
lapack_int LAPACKE_dsptrf( int matrix_order, char uplo, lapack_int n,
                           double* ap, lapack_int* ipiv );
lapack_int LAPACKE_csptrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* ap, lapack_int* ipiv );
lapack_int LAPACKE_zsptrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* ap, lapack_int* ipiv );

lapack_int LAPACKE_ssptri( int matrix_order, char uplo, lapack_int n, float* ap,
                           const lapack_int* ipiv );
lapack_int LAPACKE_dsptri( int matrix_order, char uplo, lapack_int n,
                           double* ap, const lapack_int* ipiv );
lapack_int LAPACKE_csptri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* ap, const lapack_int* ipiv );
lapack_int LAPACKE_zsptri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* ap, const lapack_int* ipiv );

lapack_int LAPACKE_ssptrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* ap,
                           const lapack_int* ipiv, float* b, lapack_int ldb );
lapack_int LAPACKE_dsptrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* ap,
                           const lapack_int* ipiv, double* b, lapack_int ldb );
lapack_int LAPACKE_csptrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* ap,
                           const lapack_int* ipiv, lapack_complex_float* b,
                           lapack_int ldb );
lapack_int LAPACKE_zsptrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* ap,
                           const lapack_int* ipiv, lapack_complex_double* b,
                           lapack_int ldb );

lapack_int LAPACKE_sstebz( char range, char order, lapack_int n, float vl,
                           float vu, lapack_int il, lapack_int iu, float abstol,
                           const float* d, const float* e, lapack_int* m,
                           lapack_int* nsplit, float* w, lapack_int* iblock,
                           lapack_int* isplit );
lapack_int LAPACKE_dstebz( char range, char order, lapack_int n, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, const double* d, const double* e,
                           lapack_int* m, lapack_int* nsplit, double* w,
                           lapack_int* iblock, lapack_int* isplit );

lapack_int LAPACKE_sstedc( int matrix_order, char compz, lapack_int n, float* d,
                           float* e, float* z, lapack_int ldz );
lapack_int LAPACKE_dstedc( int matrix_order, char compz, lapack_int n,
                           double* d, double* e, double* z, lapack_int ldz );
lapack_int LAPACKE_cstedc( int matrix_order, char compz, lapack_int n, float* d,
                           float* e, lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zstedc( int matrix_order, char compz, lapack_int n,
                           double* d, double* e, lapack_complex_double* z,
                           lapack_int ldz );

lapack_int LAPACKE_sstegr( int matrix_order, char jobz, char range,
                           lapack_int n, float* d, float* e, float vl, float vu,
                           lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, float* z, lapack_int ldz,
                           lapack_int* isuppz );
lapack_int LAPACKE_dstegr( int matrix_order, char jobz, char range,
                           lapack_int n, double* d, double* e, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w, double* z,
                           lapack_int ldz, lapack_int* isuppz );
lapack_int LAPACKE_cstegr( int matrix_order, char jobz, char range,
                           lapack_int n, float* d, float* e, float vl, float vu,
                           lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, lapack_complex_float* z,
                           lapack_int ldz, lapack_int* isuppz );
lapack_int LAPACKE_zstegr( int matrix_order, char jobz, char range,
                           lapack_int n, double* d, double* e, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w,
                           lapack_complex_double* z, lapack_int ldz,
                           lapack_int* isuppz );

lapack_int LAPACKE_sstein( int matrix_order, lapack_int n, const float* d,
                           const float* e, lapack_int m, const float* w,
                           const lapack_int* iblock, const lapack_int* isplit,
                           float* z, lapack_int ldz, lapack_int* ifailv );
lapack_int LAPACKE_dstein( int matrix_order, lapack_int n, const double* d,
                           const double* e, lapack_int m, const double* w,
                           const lapack_int* iblock, const lapack_int* isplit,
                           double* z, lapack_int ldz, lapack_int* ifailv );
lapack_int LAPACKE_cstein( int matrix_order, lapack_int n, const float* d,
                           const float* e, lapack_int m, const float* w,
                           const lapack_int* iblock, const lapack_int* isplit,
                           lapack_complex_float* z, lapack_int ldz,
                           lapack_int* ifailv );
lapack_int LAPACKE_zstein( int matrix_order, lapack_int n, const double* d,
                           const double* e, lapack_int m, const double* w,
                           const lapack_int* iblock, const lapack_int* isplit,
                           lapack_complex_double* z, lapack_int ldz,
                           lapack_int* ifailv );

lapack_int LAPACKE_sstemr( int matrix_order, char jobz, char range,
                           lapack_int n, float* d, float* e, float vl, float vu,
                           lapack_int il, lapack_int iu, lapack_int* m,
                           float* w, float* z, lapack_int ldz, lapack_int nzc,
                           lapack_int* isuppz, lapack_logical* tryrac );
lapack_int LAPACKE_dstemr( int matrix_order, char jobz, char range,
                           lapack_int n, double* d, double* e, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           lapack_int* m, double* w, double* z, lapack_int ldz,
                           lapack_int nzc, lapack_int* isuppz,
                           lapack_logical* tryrac );
lapack_int LAPACKE_cstemr( int matrix_order, char jobz, char range,
                           lapack_int n, float* d, float* e, float vl, float vu,
                           lapack_int il, lapack_int iu, lapack_int* m,
                           float* w, lapack_complex_float* z, lapack_int ldz,
                           lapack_int nzc, lapack_int* isuppz,
                           lapack_logical* tryrac );
lapack_int LAPACKE_zstemr( int matrix_order, char jobz, char range,
                           lapack_int n, double* d, double* e, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           lapack_int* m, double* w, lapack_complex_double* z,
                           lapack_int ldz, lapack_int nzc, lapack_int* isuppz,
                           lapack_logical* tryrac );

lapack_int LAPACKE_ssteqr( int matrix_order, char compz, lapack_int n, float* d,
                           float* e, float* z, lapack_int ldz );
lapack_int LAPACKE_dsteqr( int matrix_order, char compz, lapack_int n,
                           double* d, double* e, double* z, lapack_int ldz );
lapack_int LAPACKE_csteqr( int matrix_order, char compz, lapack_int n, float* d,
                           float* e, lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zsteqr( int matrix_order, char compz, lapack_int n,
                           double* d, double* e, lapack_complex_double* z,
                           lapack_int ldz );

lapack_int LAPACKE_ssterf( lapack_int n, float* d, float* e );
lapack_int LAPACKE_dsterf( lapack_int n, double* d, double* e );

lapack_int LAPACKE_sstev( int matrix_order, char jobz, lapack_int n, float* d,
                          float* e, float* z, lapack_int ldz );
lapack_int LAPACKE_dstev( int matrix_order, char jobz, lapack_int n, double* d,
                          double* e, double* z, lapack_int ldz );

lapack_int LAPACKE_sstevd( int matrix_order, char jobz, lapack_int n, float* d,
                           float* e, float* z, lapack_int ldz );
lapack_int LAPACKE_dstevd( int matrix_order, char jobz, lapack_int n, double* d,
                           double* e, double* z, lapack_int ldz );

lapack_int LAPACKE_sstevr( int matrix_order, char jobz, char range,
                           lapack_int n, float* d, float* e, float vl, float vu,
                           lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, float* z, lapack_int ldz,
                           lapack_int* isuppz );
lapack_int LAPACKE_dstevr( int matrix_order, char jobz, char range,
                           lapack_int n, double* d, double* e, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w, double* z,
                           lapack_int ldz, lapack_int* isuppz );

lapack_int LAPACKE_sstevx( int matrix_order, char jobz, char range,
                           lapack_int n, float* d, float* e, float vl, float vu,
                           lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, float* z, lapack_int ldz,
                           lapack_int* ifail );
lapack_int LAPACKE_dstevx( int matrix_order, char jobz, char range,
                           lapack_int n, double* d, double* e, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w, double* z,
                           lapack_int ldz, lapack_int* ifail );

lapack_int LAPACKE_ssycon( int matrix_order, char uplo, lapack_int n,
                           const float* a, lapack_int lda,
                           const lapack_int* ipiv, float anorm, float* rcond );
lapack_int LAPACKE_dsycon( int matrix_order, char uplo, lapack_int n,
                           const double* a, lapack_int lda,
                           const lapack_int* ipiv, double anorm,
                           double* rcond );
lapack_int LAPACKE_csycon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_int* ipiv, float anorm, float* rcond );
lapack_int LAPACKE_zsycon( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_int* ipiv, double anorm,
                           double* rcond );

lapack_int LAPACKE_ssyequb( int matrix_order, char uplo, lapack_int n,
                            const float* a, lapack_int lda, float* s,
                            float* scond, float* amax );
lapack_int LAPACKE_dsyequb( int matrix_order, char uplo, lapack_int n,
                            const double* a, lapack_int lda, double* s,
                            double* scond, double* amax );
lapack_int LAPACKE_csyequb( int matrix_order, char uplo, lapack_int n,
                            const lapack_complex_float* a, lapack_int lda,
                            float* s, float* scond, float* amax );
lapack_int LAPACKE_zsyequb( int matrix_order, char uplo, lapack_int n,
                            const lapack_complex_double* a, lapack_int lda,
                            double* s, double* scond, double* amax );

lapack_int LAPACKE_ssyev( int matrix_order, char jobz, char uplo, lapack_int n,
                          float* a, lapack_int lda, float* w );
lapack_int LAPACKE_dsyev( int matrix_order, char jobz, char uplo, lapack_int n,
                          double* a, lapack_int lda, double* w );

lapack_int LAPACKE_ssyevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           float* a, lapack_int lda, float* w );
lapack_int LAPACKE_dsyevd( int matrix_order, char jobz, char uplo, lapack_int n,
                           double* a, lapack_int lda, double* w );

lapack_int LAPACKE_ssyevr( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, float* a, lapack_int lda, float vl,
                           float vu, lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, float* z, lapack_int ldz,
                           lapack_int* isuppz );
lapack_int LAPACKE_dsyevr( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, double* a, lapack_int lda, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w, double* z,
                           lapack_int ldz, lapack_int* isuppz );

lapack_int LAPACKE_ssyevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, float* a, lapack_int lda, float vl,
                           float vu, lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, float* z, lapack_int ldz,
                           lapack_int* ifail );
lapack_int LAPACKE_dsyevx( int matrix_order, char jobz, char range, char uplo,
                           lapack_int n, double* a, lapack_int lda, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w, double* z,
                           lapack_int ldz, lapack_int* ifail );

lapack_int LAPACKE_ssygst( int matrix_order, lapack_int itype, char uplo,
                           lapack_int n, float* a, lapack_int lda,
                           const float* b, lapack_int ldb );
lapack_int LAPACKE_dsygst( int matrix_order, lapack_int itype, char uplo,
                           lapack_int n, double* a, lapack_int lda,
                           const double* b, lapack_int ldb );

lapack_int LAPACKE_ssygv( int matrix_order, lapack_int itype, char jobz,
                          char uplo, lapack_int n, float* a, lapack_int lda,
                          float* b, lapack_int ldb, float* w );
lapack_int LAPACKE_dsygv( int matrix_order, lapack_int itype, char jobz,
                          char uplo, lapack_int n, double* a, lapack_int lda,
                          double* b, lapack_int ldb, double* w );

lapack_int LAPACKE_ssygvd( int matrix_order, lapack_int itype, char jobz,
                           char uplo, lapack_int n, float* a, lapack_int lda,
                           float* b, lapack_int ldb, float* w );
lapack_int LAPACKE_dsygvd( int matrix_order, lapack_int itype, char jobz,
                           char uplo, lapack_int n, double* a, lapack_int lda,
                           double* b, lapack_int ldb, double* w );

lapack_int LAPACKE_ssygvx( int matrix_order, lapack_int itype, char jobz,
                           char range, char uplo, lapack_int n, float* a,
                           lapack_int lda, float* b, lapack_int ldb, float vl,
                           float vu, lapack_int il, lapack_int iu, float abstol,
                           lapack_int* m, float* w, float* z, lapack_int ldz,
                           lapack_int* ifail );
lapack_int LAPACKE_dsygvx( int matrix_order, lapack_int itype, char jobz,
                           char range, char uplo, lapack_int n, double* a,
                           lapack_int lda, double* b, lapack_int ldb, double vl,
                           double vu, lapack_int il, lapack_int iu,
                           double abstol, lapack_int* m, double* w, double* z,
                           lapack_int ldz, lapack_int* ifail );

lapack_int LAPACKE_ssyrfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* a, lapack_int lda,
                           const float* af, lapack_int ldaf,
                           const lapack_int* ipiv, const float* b,
                           lapack_int ldb, float* x, lapack_int ldx,
                           float* ferr, float* berr );
lapack_int LAPACKE_dsyrfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* a, lapack_int lda,
                           const double* af, lapack_int ldaf,
                           const lapack_int* ipiv, const double* b,
                           lapack_int ldb, double* x, lapack_int ldx,
                           double* ferr, double* berr );
lapack_int LAPACKE_csyrfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* af,
                           lapack_int ldaf, const lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_zsyrfs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* af,
                           lapack_int ldaf, const lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_ssyrfsx( int matrix_order, char uplo, char equed,
                            lapack_int n, lapack_int nrhs, const float* a,
                            lapack_int lda, const float* af, lapack_int ldaf,
                            const lapack_int* ipiv, const float* s,
                            const float* b, lapack_int ldb, float* x,
                            lapack_int ldx, float* rcond, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_dsyrfsx( int matrix_order, char uplo, char equed,
                            lapack_int n, lapack_int nrhs, const double* a,
                            lapack_int lda, const double* af, lapack_int ldaf,
                            const lapack_int* ipiv, const double* s,
                            const double* b, lapack_int ldb, double* x,
                            lapack_int ldx, double* rcond, double* berr,
                            lapack_int n_err_bnds, double* err_bnds_norm,
                            double* err_bnds_comp, lapack_int nparams,
                            double* params );
lapack_int LAPACKE_csyrfsx( int matrix_order, char uplo, char equed,
                            lapack_int n, lapack_int nrhs,
                            const lapack_complex_float* a, lapack_int lda,
                            const lapack_complex_float* af, lapack_int ldaf,
                            const lapack_int* ipiv, const float* s,
                            const lapack_complex_float* b, lapack_int ldb,
                            lapack_complex_float* x, lapack_int ldx,
                            float* rcond, float* berr, lapack_int n_err_bnds,
                            float* err_bnds_norm, float* err_bnds_comp,
                            lapack_int nparams, float* params );
lapack_int LAPACKE_zsyrfsx( int matrix_order, char uplo, char equed,
                            lapack_int n, lapack_int nrhs,
                            const lapack_complex_double* a, lapack_int lda,
                            const lapack_complex_double* af, lapack_int ldaf,
                            const lapack_int* ipiv, const double* s,
                            const lapack_complex_double* b, lapack_int ldb,
                            lapack_complex_double* x, lapack_int ldx,
                            double* rcond, double* berr, lapack_int n_err_bnds,
                            double* err_bnds_norm, double* err_bnds_comp,
                            lapack_int nparams, double* params );

lapack_int LAPACKE_ssysv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, float* a, lapack_int lda,
                          lapack_int* ipiv, float* b, lapack_int ldb );
lapack_int LAPACKE_dsysv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, double* a, lapack_int lda,
                          lapack_int* ipiv, double* b, lapack_int ldb );
lapack_int LAPACKE_csysv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_float* a,
                          lapack_int lda, lapack_int* ipiv,
                          lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zsysv( int matrix_order, char uplo, lapack_int n,
                          lapack_int nrhs, lapack_complex_double* a,
                          lapack_int lda, lapack_int* ipiv,
                          lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_ssysvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const float* a, lapack_int lda,
                           float* af, lapack_int ldaf, lapack_int* ipiv,
                           const float* b, lapack_int ldb, float* x,
                           lapack_int ldx, float* rcond, float* ferr,
                           float* berr );
lapack_int LAPACKE_dsysvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const double* a, lapack_int lda,
                           double* af, lapack_int ldaf, lapack_int* ipiv,
                           const double* b, lapack_int ldb, double* x,
                           lapack_int ldx, double* rcond, double* ferr,
                           double* berr );
lapack_int LAPACKE_csysvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* af,
                           lapack_int ldaf, lapack_int* ipiv,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* x, lapack_int ldx,
                           float* rcond, float* ferr, float* berr );
lapack_int LAPACKE_zsysvx( int matrix_order, char fact, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* af,
                           lapack_int ldaf, lapack_int* ipiv,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* x, lapack_int ldx,
                           double* rcond, double* ferr, double* berr );

lapack_int LAPACKE_ssysvxx( int matrix_order, char fact, char uplo,
                            lapack_int n, lapack_int nrhs, float* a,
                            lapack_int lda, float* af, lapack_int ldaf,
                            lapack_int* ipiv, char* equed, float* s, float* b,
                            lapack_int ldb, float* x, lapack_int ldx,
                            float* rcond, float* rpvgrw, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_dsysvxx( int matrix_order, char fact, char uplo,
                            lapack_int n, lapack_int nrhs, double* a,
                            lapack_int lda, double* af, lapack_int ldaf,
                            lapack_int* ipiv, char* equed, double* s, double* b,
                            lapack_int ldb, double* x, lapack_int ldx,
                            double* rcond, double* rpvgrw, double* berr,
                            lapack_int n_err_bnds, double* err_bnds_norm,
                            double* err_bnds_comp, lapack_int nparams,
                            double* params );
lapack_int LAPACKE_csysvxx( int matrix_order, char fact, char uplo,
                            lapack_int n, lapack_int nrhs,
                            lapack_complex_float* a, lapack_int lda,
                            lapack_complex_float* af, lapack_int ldaf,
                            lapack_int* ipiv, char* equed, float* s,
                            lapack_complex_float* b, lapack_int ldb,
                            lapack_complex_float* x, lapack_int ldx,
                            float* rcond, float* rpvgrw, float* berr,
                            lapack_int n_err_bnds, float* err_bnds_norm,
                            float* err_bnds_comp, lapack_int nparams,
                            float* params );
lapack_int LAPACKE_zsysvxx( int matrix_order, char fact, char uplo,
                            lapack_int n, lapack_int nrhs,
                            lapack_complex_double* a, lapack_int lda,
                            lapack_complex_double* af, lapack_int ldaf,
                            lapack_int* ipiv, char* equed, double* s,
                            lapack_complex_double* b, lapack_int ldb,
                            lapack_complex_double* x, lapack_int ldx,
                            double* rcond, double* rpvgrw, double* berr,
                            lapack_int n_err_bnds, double* err_bnds_norm,
                            double* err_bnds_comp, lapack_int nparams,
                            double* params );

lapack_int LAPACKE_ssytrd( int matrix_order, char uplo, lapack_int n, float* a,
                           lapack_int lda, float* d, float* e, float* tau );
lapack_int LAPACKE_dsytrd( int matrix_order, char uplo, lapack_int n, double* a,
                           lapack_int lda, double* d, double* e, double* tau );

lapack_int LAPACKE_ssytrf( int matrix_order, char uplo, lapack_int n, float* a,
                           lapack_int lda, lapack_int* ipiv );
lapack_int LAPACKE_dsytrf( int matrix_order, char uplo, lapack_int n, double* a,
                           lapack_int lda, lapack_int* ipiv );
lapack_int LAPACKE_csytrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* ipiv );
lapack_int LAPACKE_zsytrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* ipiv );

lapack_int LAPACKE_ssytri( int matrix_order, char uplo, lapack_int n, float* a,
                           lapack_int lda, const lapack_int* ipiv );
lapack_int LAPACKE_dsytri( int matrix_order, char uplo, lapack_int n, double* a,
                           lapack_int lda, const lapack_int* ipiv );
lapack_int LAPACKE_csytri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           const lapack_int* ipiv );
lapack_int LAPACKE_zsytri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           const lapack_int* ipiv );

lapack_int LAPACKE_ssytrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* a, lapack_int lda,
                           const lapack_int* ipiv, float* b, lapack_int ldb );
lapack_int LAPACKE_dsytrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* a, lapack_int lda,
                           const lapack_int* ipiv, double* b, lapack_int ldb );
lapack_int LAPACKE_csytrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, const lapack_int* ipiv,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zsytrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, const lapack_int* ipiv,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_stbcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, lapack_int kd, const float* ab,
                           lapack_int ldab, float* rcond );
lapack_int LAPACKE_dtbcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, lapack_int kd, const double* ab,
                           lapack_int ldab, double* rcond );
lapack_int LAPACKE_ctbcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, lapack_int kd,
                           const lapack_complex_float* ab, lapack_int ldab,
                           float* rcond );
lapack_int LAPACKE_ztbcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, lapack_int kd,
                           const lapack_complex_double* ab, lapack_int ldab,
                           double* rcond );

lapack_int LAPACKE_stbrfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int kd, lapack_int nrhs,
                           const float* ab, lapack_int ldab, const float* b,
                           lapack_int ldb, const float* x, lapack_int ldx,
                           float* ferr, float* berr );
lapack_int LAPACKE_dtbrfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int kd, lapack_int nrhs,
                           const double* ab, lapack_int ldab, const double* b,
                           lapack_int ldb, const double* x, lapack_int ldx,
                           double* ferr, double* berr );
lapack_int LAPACKE_ctbrfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int kd, lapack_int nrhs,
                           const lapack_complex_float* ab, lapack_int ldab,
                           const lapack_complex_float* b, lapack_int ldb,
                           const lapack_complex_float* x, lapack_int ldx,
                           float* ferr, float* berr );
lapack_int LAPACKE_ztbrfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int kd, lapack_int nrhs,
                           const lapack_complex_double* ab, lapack_int ldab,
                           const lapack_complex_double* b, lapack_int ldb,
                           const lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_stbtrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int kd, lapack_int nrhs,
                           const float* ab, lapack_int ldab, float* b,
                           lapack_int ldb );
lapack_int LAPACKE_dtbtrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int kd, lapack_int nrhs,
                           const double* ab, lapack_int ldab, double* b,
                           lapack_int ldb );
lapack_int LAPACKE_ctbtrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int kd, lapack_int nrhs,
                           const lapack_complex_float* ab, lapack_int ldab,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_ztbtrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int kd, lapack_int nrhs,
                           const lapack_complex_double* ab, lapack_int ldab,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_stfsm( int matrix_order, char transr, char side, char uplo,
                          char trans, char diag, lapack_int m, lapack_int n,
                          float alpha, const float* a, float* b,
                          lapack_int ldb );
lapack_int LAPACKE_dtfsm( int matrix_order, char transr, char side, char uplo,
                          char trans, char diag, lapack_int m, lapack_int n,
                          double alpha, const double* a, double* b,
                          lapack_int ldb );
lapack_int LAPACKE_ctfsm( int matrix_order, char transr, char side, char uplo,
                          char trans, char diag, lapack_int m, lapack_int n,
                          lapack_complex_float alpha,
                          const lapack_complex_float* a,
                          lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_ztfsm( int matrix_order, char transr, char side, char uplo,
                          char trans, char diag, lapack_int m, lapack_int n,
                          lapack_complex_double alpha,
                          const lapack_complex_double* a,
                          lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_stftri( int matrix_order, char transr, char uplo, char diag,
                           lapack_int n, float* a );
lapack_int LAPACKE_dtftri( int matrix_order, char transr, char uplo, char diag,
                           lapack_int n, double* a );
lapack_int LAPACKE_ctftri( int matrix_order, char transr, char uplo, char diag,
                           lapack_int n, lapack_complex_float* a );
lapack_int LAPACKE_ztftri( int matrix_order, char transr, char uplo, char diag,
                           lapack_int n, lapack_complex_double* a );

lapack_int LAPACKE_stfttp( int matrix_order, char transr, char uplo,
                           lapack_int n, const float* arf, float* ap );
lapack_int LAPACKE_dtfttp( int matrix_order, char transr, char uplo,
                           lapack_int n, const double* arf, double* ap );
lapack_int LAPACKE_ctfttp( int matrix_order, char transr, char uplo,
                           lapack_int n, const lapack_complex_float* arf,
                           lapack_complex_float* ap );
lapack_int LAPACKE_ztfttp( int matrix_order, char transr, char uplo,
                           lapack_int n, const lapack_complex_double* arf,
                           lapack_complex_double* ap );

lapack_int LAPACKE_stfttr( int matrix_order, char transr, char uplo,
                           lapack_int n, const float* arf, float* a,
                           lapack_int lda );
lapack_int LAPACKE_dtfttr( int matrix_order, char transr, char uplo,
                           lapack_int n, const double* arf, double* a,
                           lapack_int lda );
lapack_int LAPACKE_ctfttr( int matrix_order, char transr, char uplo,
                           lapack_int n, const lapack_complex_float* arf,
                           lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_ztfttr( int matrix_order, char transr, char uplo,
                           lapack_int n, const lapack_complex_double* arf,
                           lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_stgevc( int matrix_order, char side, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const float* s, lapack_int lds, const float* p,
                           lapack_int ldp, float* vl, lapack_int ldvl,
                           float* vr, lapack_int ldvr, lapack_int mm,
                           lapack_int* m );
lapack_int LAPACKE_dtgevc( int matrix_order, char side, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const double* s, lapack_int lds, const double* p,
                           lapack_int ldp, double* vl, lapack_int ldvl,
                           double* vr, lapack_int ldvr, lapack_int mm,
                           lapack_int* m );
lapack_int LAPACKE_ctgevc( int matrix_order, char side, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const lapack_complex_float* s, lapack_int lds,
                           const lapack_complex_float* p, lapack_int ldp,
                           lapack_complex_float* vl, lapack_int ldvl,
                           lapack_complex_float* vr, lapack_int ldvr,
                           lapack_int mm, lapack_int* m );
lapack_int LAPACKE_ztgevc( int matrix_order, char side, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const lapack_complex_double* s, lapack_int lds,
                           const lapack_complex_double* p, lapack_int ldp,
                           lapack_complex_double* vl, lapack_int ldvl,
                           lapack_complex_double* vr, lapack_int ldvr,
                           lapack_int mm, lapack_int* m );

lapack_int LAPACKE_stgexc( int matrix_order, lapack_logical wantq,
                           lapack_logical wantz, lapack_int n, float* a,
                           lapack_int lda, float* b, lapack_int ldb, float* q,
                           lapack_int ldq, float* z, lapack_int ldz,
                           lapack_int* ifst, lapack_int* ilst );
lapack_int LAPACKE_dtgexc( int matrix_order, lapack_logical wantq,
                           lapack_logical wantz, lapack_int n, double* a,
                           lapack_int lda, double* b, lapack_int ldb, double* q,
                           lapack_int ldq, double* z, lapack_int ldz,
                           lapack_int* ifst, lapack_int* ilst );
lapack_int LAPACKE_ctgexc( int matrix_order, lapack_logical wantq,
                           lapack_logical wantz, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* q, lapack_int ldq,
                           lapack_complex_float* z, lapack_int ldz,
                           lapack_int ifst, lapack_int ilst );
lapack_int LAPACKE_ztgexc( int matrix_order, lapack_logical wantq,
                           lapack_logical wantz, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* q, lapack_int ldq,
                           lapack_complex_double* z, lapack_int ldz,
                           lapack_int ifst, lapack_int ilst );

lapack_int LAPACKE_stgsen( int matrix_order, lapack_int ijob,
                           lapack_logical wantq, lapack_logical wantz,
                           const lapack_logical* select, lapack_int n, float* a,
                           lapack_int lda, float* b, lapack_int ldb,
                           float* alphar, float* alphai, float* beta, float* q,
                           lapack_int ldq, float* z, lapack_int ldz,
                           lapack_int* m, float* pl, float* pr, float* dif );
lapack_int LAPACKE_dtgsen( int matrix_order, lapack_int ijob,
                           lapack_logical wantq, lapack_logical wantz,
                           const lapack_logical* select, lapack_int n,
                           double* a, lapack_int lda, double* b, lapack_int ldb,
                           double* alphar, double* alphai, double* beta,
                           double* q, lapack_int ldq, double* z, lapack_int ldz,
                           lapack_int* m, double* pl, double* pr, double* dif );
lapack_int LAPACKE_ctgsen( int matrix_order, lapack_int ijob,
                           lapack_logical wantq, lapack_logical wantz,
                           const lapack_logical* select, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* alpha,
                           lapack_complex_float* beta, lapack_complex_float* q,
                           lapack_int ldq, lapack_complex_float* z,
                           lapack_int ldz, lapack_int* m, float* pl, float* pr,
                           float* dif );
lapack_int LAPACKE_ztgsen( int matrix_order, lapack_int ijob,
                           lapack_logical wantq, lapack_logical wantz,
                           const lapack_logical* select, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* alpha,
                           lapack_complex_double* beta,
                           lapack_complex_double* q, lapack_int ldq,
                           lapack_complex_double* z, lapack_int ldz,
                           lapack_int* m, double* pl, double* pr, double* dif );

lapack_int LAPACKE_stgsja( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int p, lapack_int n,
                           lapack_int k, lapack_int l, float* a, lapack_int lda,
                           float* b, lapack_int ldb, float tola, float tolb,
                           float* alpha, float* beta, float* u, lapack_int ldu,
                           float* v, lapack_int ldv, float* q, lapack_int ldq,
                           lapack_int* ncycle );
lapack_int LAPACKE_dtgsja( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int p, lapack_int n,
                           lapack_int k, lapack_int l, double* a,
                           lapack_int lda, double* b, lapack_int ldb,
                           double tola, double tolb, double* alpha,
                           double* beta, double* u, lapack_int ldu, double* v,
                           lapack_int ldv, double* q, lapack_int ldq,
                           lapack_int* ncycle );
lapack_int LAPACKE_ctgsja( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int p, lapack_int n,
                           lapack_int k, lapack_int l, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb, float tola, float tolb, float* alpha,
                           float* beta, lapack_complex_float* u, lapack_int ldu,
                           lapack_complex_float* v, lapack_int ldv,
                           lapack_complex_float* q, lapack_int ldq,
                           lapack_int* ncycle );
lapack_int LAPACKE_ztgsja( int matrix_order, char jobu, char jobv, char jobq,
                           lapack_int m, lapack_int p, lapack_int n,
                           lapack_int k, lapack_int l, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb, double tola, double tolb,
                           double* alpha, double* beta,
                           lapack_complex_double* u, lapack_int ldu,
                           lapack_complex_double* v, lapack_int ldv,
                           lapack_complex_double* q, lapack_int ldq,
                           lapack_int* ncycle );

lapack_int LAPACKE_stgsna( int matrix_order, char job, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const float* a, lapack_int lda, const float* b,
                           lapack_int ldb, const float* vl, lapack_int ldvl,
                           const float* vr, lapack_int ldvr, float* s,
                           float* dif, lapack_int mm, lapack_int* m );
lapack_int LAPACKE_dtgsna( int matrix_order, char job, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const double* a, lapack_int lda, const double* b,
                           lapack_int ldb, const double* vl, lapack_int ldvl,
                           const double* vr, lapack_int ldvr, double* s,
                           double* dif, lapack_int mm, lapack_int* m );
lapack_int LAPACKE_ctgsna( int matrix_order, char job, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* b, lapack_int ldb,
                           const lapack_complex_float* vl, lapack_int ldvl,
                           const lapack_complex_float* vr, lapack_int ldvr,
                           float* s, float* dif, lapack_int mm, lapack_int* m );
lapack_int LAPACKE_ztgsna( int matrix_order, char job, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* b, lapack_int ldb,
                           const lapack_complex_double* vl, lapack_int ldvl,
                           const lapack_complex_double* vr, lapack_int ldvr,
                           double* s, double* dif, lapack_int mm,
                           lapack_int* m );

lapack_int LAPACKE_stgsyl( int matrix_order, char trans, lapack_int ijob,
                           lapack_int m, lapack_int n, const float* a,
                           lapack_int lda, const float* b, lapack_int ldb,
                           float* c, lapack_int ldc, const float* d,
                           lapack_int ldd, const float* e, lapack_int lde,
                           float* f, lapack_int ldf, float* scale, float* dif );
lapack_int LAPACKE_dtgsyl( int matrix_order, char trans, lapack_int ijob,
                           lapack_int m, lapack_int n, const double* a,
                           lapack_int lda, const double* b, lapack_int ldb,
                           double* c, lapack_int ldc, const double* d,
                           lapack_int ldd, const double* e, lapack_int lde,
                           double* f, lapack_int ldf, double* scale,
                           double* dif );
lapack_int LAPACKE_ctgsyl( int matrix_order, char trans, lapack_int ijob,
                           lapack_int m, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* c, lapack_int ldc,
                           const lapack_complex_float* d, lapack_int ldd,
                           const lapack_complex_float* e, lapack_int lde,
                           lapack_complex_float* f, lapack_int ldf,
                           float* scale, float* dif );
lapack_int LAPACKE_ztgsyl( int matrix_order, char trans, lapack_int ijob,
                           lapack_int m, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* c, lapack_int ldc,
                           const lapack_complex_double* d, lapack_int ldd,
                           const lapack_complex_double* e, lapack_int lde,
                           lapack_complex_double* f, lapack_int ldf,
                           double* scale, double* dif );

lapack_int LAPACKE_stpcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, const float* ap, float* rcond );
lapack_int LAPACKE_dtpcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, const double* ap, double* rcond );
lapack_int LAPACKE_ctpcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, const lapack_complex_float* ap,
                           float* rcond );
lapack_int LAPACKE_ztpcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, const lapack_complex_double* ap,
                           double* rcond );

lapack_int LAPACKE_stprfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs, const float* ap,
                           const float* b, lapack_int ldb, const float* x,
                           lapack_int ldx, float* ferr, float* berr );
lapack_int LAPACKE_dtprfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs, const double* ap,
                           const double* b, lapack_int ldb, const double* x,
                           lapack_int ldx, double* ferr, double* berr );
lapack_int LAPACKE_ctprfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_float* ap,
                           const lapack_complex_float* b, lapack_int ldb,
                           const lapack_complex_float* x, lapack_int ldx,
                           float* ferr, float* berr );
lapack_int LAPACKE_ztprfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_double* ap,
                           const lapack_complex_double* b, lapack_int ldb,
                           const lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_stptri( int matrix_order, char uplo, char diag, lapack_int n,
                           float* ap );
lapack_int LAPACKE_dtptri( int matrix_order, char uplo, char diag, lapack_int n,
                           double* ap );
lapack_int LAPACKE_ctptri( int matrix_order, char uplo, char diag, lapack_int n,
                           lapack_complex_float* ap );
lapack_int LAPACKE_ztptri( int matrix_order, char uplo, char diag, lapack_int n,
                           lapack_complex_double* ap );

lapack_int LAPACKE_stptrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs, const float* ap,
                           float* b, lapack_int ldb );
lapack_int LAPACKE_dtptrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs, const double* ap,
                           double* b, lapack_int ldb );
lapack_int LAPACKE_ctptrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_float* ap,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_ztptrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_double* ap,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_stpttf( int matrix_order, char transr, char uplo,
                           lapack_int n, const float* ap, float* arf );
lapack_int LAPACKE_dtpttf( int matrix_order, char transr, char uplo,
                           lapack_int n, const double* ap, double* arf );
lapack_int LAPACKE_ctpttf( int matrix_order, char transr, char uplo,
                           lapack_int n, const lapack_complex_float* ap,
                           lapack_complex_float* arf );
lapack_int LAPACKE_ztpttf( int matrix_order, char transr, char uplo,
                           lapack_int n, const lapack_complex_double* ap,
                           lapack_complex_double* arf );

lapack_int LAPACKE_stpttr( int matrix_order, char uplo, lapack_int n,
                           const float* ap, float* a, lapack_int lda );
lapack_int LAPACKE_dtpttr( int matrix_order, char uplo, lapack_int n,
                           const double* ap, double* a, lapack_int lda );
lapack_int LAPACKE_ctpttr( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_float* ap,
                           lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_ztpttr( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_double* ap,
                           lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_strcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, const float* a, lapack_int lda,
                           float* rcond );
lapack_int LAPACKE_dtrcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, const double* a, lapack_int lda,
                           double* rcond );
lapack_int LAPACKE_ctrcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, const lapack_complex_float* a,
                           lapack_int lda, float* rcond );
lapack_int LAPACKE_ztrcon( int matrix_order, char norm, char uplo, char diag,
                           lapack_int n, const lapack_complex_double* a,
                           lapack_int lda, double* rcond );

lapack_int LAPACKE_strevc( int matrix_order, char side, char howmny,
                           lapack_logical* select, lapack_int n, const float* t,
                           lapack_int ldt, float* vl, lapack_int ldvl,
                           float* vr, lapack_int ldvr, lapack_int mm,
                           lapack_int* m );
lapack_int LAPACKE_dtrevc( int matrix_order, char side, char howmny,
                           lapack_logical* select, lapack_int n,
                           const double* t, lapack_int ldt, double* vl,
                           lapack_int ldvl, double* vr, lapack_int ldvr,
                           lapack_int mm, lapack_int* m );
lapack_int LAPACKE_ctrevc( int matrix_order, char side, char howmny,
                           const lapack_logical* select, lapack_int n,
                           lapack_complex_float* t, lapack_int ldt,
                           lapack_complex_float* vl, lapack_int ldvl,
                           lapack_complex_float* vr, lapack_int ldvr,
                           lapack_int mm, lapack_int* m );
lapack_int LAPACKE_ztrevc( int matrix_order, char side, char howmny,
                           const lapack_logical* select, lapack_int n,
                           lapack_complex_double* t, lapack_int ldt,
                           lapack_complex_double* vl, lapack_int ldvl,
                           lapack_complex_double* vr, lapack_int ldvr,
                           lapack_int mm, lapack_int* m );

lapack_int LAPACKE_strexc( int matrix_order, char compq, lapack_int n, float* t,
                           lapack_int ldt, float* q, lapack_int ldq,
                           lapack_int* ifst, lapack_int* ilst );
lapack_int LAPACKE_dtrexc( int matrix_order, char compq, lapack_int n,
                           double* t, lapack_int ldt, double* q, lapack_int ldq,
                           lapack_int* ifst, lapack_int* ilst );
lapack_int LAPACKE_ctrexc( int matrix_order, char compq, lapack_int n,
                           lapack_complex_float* t, lapack_int ldt,
                           lapack_complex_float* q, lapack_int ldq,
                           lapack_int ifst, lapack_int ilst );
lapack_int LAPACKE_ztrexc( int matrix_order, char compq, lapack_int n,
                           lapack_complex_double* t, lapack_int ldt,
                           lapack_complex_double* q, lapack_int ldq,
                           lapack_int ifst, lapack_int ilst );

lapack_int LAPACKE_strrfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs, const float* a,
                           lapack_int lda, const float* b, lapack_int ldb,
                           const float* x, lapack_int ldx, float* ferr,
                           float* berr );
lapack_int LAPACKE_dtrrfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs, const double* a,
                           lapack_int lda, const double* b, lapack_int ldb,
                           const double* x, lapack_int ldx, double* ferr,
                           double* berr );
lapack_int LAPACKE_ctrrfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* b, lapack_int ldb,
                           const lapack_complex_float* x, lapack_int ldx,
                           float* ferr, float* berr );
lapack_int LAPACKE_ztrrfs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* b, lapack_int ldb,
                           const lapack_complex_double* x, lapack_int ldx,
                           double* ferr, double* berr );

lapack_int LAPACKE_strsen( int matrix_order, char job, char compq,
                           const lapack_logical* select, lapack_int n, float* t,
                           lapack_int ldt, float* q, lapack_int ldq, float* wr,
                           float* wi, lapack_int* m, float* s, float* sep );
lapack_int LAPACKE_dtrsen( int matrix_order, char job, char compq,
                           const lapack_logical* select, lapack_int n,
                           double* t, lapack_int ldt, double* q, lapack_int ldq,
                           double* wr, double* wi, lapack_int* m, double* s,
                           double* sep );
lapack_int LAPACKE_ctrsen( int matrix_order, char job, char compq,
                           const lapack_logical* select, lapack_int n,
                           lapack_complex_float* t, lapack_int ldt,
                           lapack_complex_float* q, lapack_int ldq,
                           lapack_complex_float* w, lapack_int* m, float* s,
                           float* sep );
lapack_int LAPACKE_ztrsen( int matrix_order, char job, char compq,
                           const lapack_logical* select, lapack_int n,
                           lapack_complex_double* t, lapack_int ldt,
                           lapack_complex_double* q, lapack_int ldq,
                           lapack_complex_double* w, lapack_int* m, double* s,
                           double* sep );

lapack_int LAPACKE_strsna( int matrix_order, char job, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const float* t, lapack_int ldt, const float* vl,
                           lapack_int ldvl, const float* vr, lapack_int ldvr,
                           float* s, float* sep, lapack_int mm, lapack_int* m );
lapack_int LAPACKE_dtrsna( int matrix_order, char job, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const double* t, lapack_int ldt, const double* vl,
                           lapack_int ldvl, const double* vr, lapack_int ldvr,
                           double* s, double* sep, lapack_int mm,
                           lapack_int* m );
lapack_int LAPACKE_ctrsna( int matrix_order, char job, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const lapack_complex_float* t, lapack_int ldt,
                           const lapack_complex_float* vl, lapack_int ldvl,
                           const lapack_complex_float* vr, lapack_int ldvr,
                           float* s, float* sep, lapack_int mm, lapack_int* m );
lapack_int LAPACKE_ztrsna( int matrix_order, char job, char howmny,
                           const lapack_logical* select, lapack_int n,
                           const lapack_complex_double* t, lapack_int ldt,
                           const lapack_complex_double* vl, lapack_int ldvl,
                           const lapack_complex_double* vr, lapack_int ldvr,
                           double* s, double* sep, lapack_int mm,
                           lapack_int* m );

lapack_int LAPACKE_strsyl( int matrix_order, char trana, char tranb,
                           lapack_int isgn, lapack_int m, lapack_int n,
                           const float* a, lapack_int lda, const float* b,
                           lapack_int ldb, float* c, lapack_int ldc,
                           float* scale );
lapack_int LAPACKE_dtrsyl( int matrix_order, char trana, char tranb,
                           lapack_int isgn, lapack_int m, lapack_int n,
                           const double* a, lapack_int lda, const double* b,
                           lapack_int ldb, double* c, lapack_int ldc,
                           double* scale );
lapack_int LAPACKE_ctrsyl( int matrix_order, char trana, char tranb,
                           lapack_int isgn, lapack_int m, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* c, lapack_int ldc,
                           float* scale );
lapack_int LAPACKE_ztrsyl( int matrix_order, char trana, char tranb,
                           lapack_int isgn, lapack_int m, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* c, lapack_int ldc,
                           double* scale );

lapack_int LAPACKE_strtri( int matrix_order, char uplo, char diag, lapack_int n,
                           float* a, lapack_int lda );
lapack_int LAPACKE_dtrtri( int matrix_order, char uplo, char diag, lapack_int n,
                           double* a, lapack_int lda );
lapack_int LAPACKE_ctrtri( int matrix_order, char uplo, char diag, lapack_int n,
                           lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_ztrtri( int matrix_order, char uplo, char diag, lapack_int n,
                           lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_strtrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs, const float* a,
                           lapack_int lda, float* b, lapack_int ldb );
lapack_int LAPACKE_dtrtrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs, const double* a,
                           lapack_int lda, double* b, lapack_int ldb );
lapack_int LAPACKE_ctrtrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_ztrtrs( int matrix_order, char uplo, char trans, char diag,
                           lapack_int n, lapack_int nrhs,
                           const lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_strttf( int matrix_order, char transr, char uplo,
                           lapack_int n, const float* a, lapack_int lda,
                           float* arf );
lapack_int LAPACKE_dtrttf( int matrix_order, char transr, char uplo,
                           lapack_int n, const double* a, lapack_int lda,
                           double* arf );
lapack_int LAPACKE_ctrttf( int matrix_order, char transr, char uplo,
                           lapack_int n, const lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* arf );
lapack_int LAPACKE_ztrttf( int matrix_order, char transr, char uplo,
                           lapack_int n, const lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* arf );

lapack_int LAPACKE_strttp( int matrix_order, char uplo, lapack_int n,
                           const float* a, lapack_int lda, float* ap );
lapack_int LAPACKE_dtrttp( int matrix_order, char uplo, lapack_int n,
                           const double* a, lapack_int lda, double* ap );
lapack_int LAPACKE_ctrttp( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* ap );
lapack_int LAPACKE_ztrttp( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* ap );

lapack_int LAPACKE_stzrzf( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, float* tau );
lapack_int LAPACKE_dtzrzf( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, double* tau );
lapack_int LAPACKE_ctzrzf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* tau );
lapack_int LAPACKE_ztzrzf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* tau );

lapack_int LAPACKE_cungbr( int matrix_order, char vect, lapack_int m,
                           lapack_int n, lapack_int k, lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* tau );
lapack_int LAPACKE_zungbr( int matrix_order, char vect, lapack_int m,
                           lapack_int n, lapack_int k, lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* tau );

lapack_int LAPACKE_cunghr( int matrix_order, lapack_int n, lapack_int ilo,
                           lapack_int ihi, lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* tau );
lapack_int LAPACKE_zunghr( int matrix_order, lapack_int n, lapack_int ilo,
                           lapack_int ihi, lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* tau );

lapack_int LAPACKE_cunglq( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* tau );
lapack_int LAPACKE_zunglq( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* tau );

lapack_int LAPACKE_cungql( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* tau );
lapack_int LAPACKE_zungql( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* tau );

lapack_int LAPACKE_cungqr( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* tau );
lapack_int LAPACKE_zungqr( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* tau );

lapack_int LAPACKE_cungrq( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* tau );
lapack_int LAPACKE_zungrq( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int k, lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* tau );

lapack_int LAPACKE_cungtr( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* tau );
lapack_int LAPACKE_zungtr( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* tau );

lapack_int LAPACKE_cunmbr( int matrix_order, char vect, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* tau,
                           lapack_complex_float* c, lapack_int ldc );
lapack_int LAPACKE_zunmbr( int matrix_order, char vect, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* tau,
                           lapack_complex_double* c, lapack_int ldc );

lapack_int LAPACKE_cunmhr( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int ilo,
                           lapack_int ihi, const lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* tau,
                           lapack_complex_float* c, lapack_int ldc );
lapack_int LAPACKE_zunmhr( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int ilo,
                           lapack_int ihi, const lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* tau,
                           lapack_complex_double* c, lapack_int ldc );

lapack_int LAPACKE_cunmlq( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* tau,
                           lapack_complex_float* c, lapack_int ldc );
lapack_int LAPACKE_zunmlq( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* tau,
                           lapack_complex_double* c, lapack_int ldc );

lapack_int LAPACKE_cunmql( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* tau,
                           lapack_complex_float* c, lapack_int ldc );
lapack_int LAPACKE_zunmql( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* tau,
                           lapack_complex_double* c, lapack_int ldc );

lapack_int LAPACKE_cunmqr( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* tau,
                           lapack_complex_float* c, lapack_int ldc );
lapack_int LAPACKE_zunmqr( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* tau,
                           lapack_complex_double* c, lapack_int ldc );

lapack_int LAPACKE_cunmrq( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* tau,
                           lapack_complex_float* c, lapack_int ldc );
lapack_int LAPACKE_zunmrq( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* tau,
                           lapack_complex_double* c, lapack_int ldc );

lapack_int LAPACKE_cunmrz( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           lapack_int l, const lapack_complex_float* a,
                           lapack_int lda, const lapack_complex_float* tau,
                           lapack_complex_float* c, lapack_int ldc );
lapack_int LAPACKE_zunmrz( int matrix_order, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           lapack_int l, const lapack_complex_double* a,
                           lapack_int lda, const lapack_complex_double* tau,
                           lapack_complex_double* c, lapack_int ldc );

lapack_int LAPACKE_cunmtr( int matrix_order, char side, char uplo, char trans,
                           lapack_int m, lapack_int n,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* tau,
                           lapack_complex_float* c, lapack_int ldc );
lapack_int LAPACKE_zunmtr( int matrix_order, char side, char uplo, char trans,
                           lapack_int m, lapack_int n,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* tau,
                           lapack_complex_double* c, lapack_int ldc );

lapack_int LAPACKE_cupgtr( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_float* ap,
                           const lapack_complex_float* tau,
                           lapack_complex_float* q, lapack_int ldq );
lapack_int LAPACKE_zupgtr( int matrix_order, char uplo, lapack_int n,
                           const lapack_complex_double* ap,
                           const lapack_complex_double* tau,
                           lapack_complex_double* q, lapack_int ldq );

lapack_int LAPACKE_cupmtr( int matrix_order, char side, char uplo, char trans,
                           lapack_int m, lapack_int n,
                           const lapack_complex_float* ap,
                           const lapack_complex_float* tau,
                           lapack_complex_float* c, lapack_int ldc );
lapack_int LAPACKE_zupmtr( int matrix_order, char side, char uplo, char trans,
                           lapack_int m, lapack_int n,
                           const lapack_complex_double* ap,
                           const lapack_complex_double* tau,
                           lapack_complex_double* c, lapack_int ldc );

lapack_int LAPACKE_sbdsdc_work( int matrix_order, char uplo, char compq,
                                lapack_int n, float* d, float* e, float* u,
                                lapack_int ldu, float* vt, lapack_int ldvt,
                                float* q, lapack_int* iq, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dbdsdc_work( int matrix_order, char uplo, char compq,
                                lapack_int n, double* d, double* e, double* u,
                                lapack_int ldu, double* vt, lapack_int ldvt,
                                double* q, lapack_int* iq, double* work,
                                lapack_int* iwork );

lapack_int LAPACKE_sbdsqr_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int ncvt, lapack_int nru, lapack_int ncc,
                                float* d, float* e, float* vt, lapack_int ldvt,
                                float* u, lapack_int ldu, float* c,
                                lapack_int ldc, float* work );
lapack_int LAPACKE_dbdsqr_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int ncvt, lapack_int nru, lapack_int ncc,
                                double* d, double* e, double* vt,
                                lapack_int ldvt, double* u, lapack_int ldu,
                                double* c, lapack_int ldc, double* work );
lapack_int LAPACKE_cbdsqr_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int ncvt, lapack_int nru, lapack_int ncc,
                                float* d, float* e, lapack_complex_float* vt,
                                lapack_int ldvt, lapack_complex_float* u,
                                lapack_int ldu, lapack_complex_float* c,
                                lapack_int ldc, float* work );
lapack_int LAPACKE_zbdsqr_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int ncvt, lapack_int nru, lapack_int ncc,
                                double* d, double* e, lapack_complex_double* vt,
                                lapack_int ldvt, lapack_complex_double* u,
                                lapack_int ldu, lapack_complex_double* c,
                                lapack_int ldc, double* work );

lapack_int LAPACKE_sdisna_work( char job, lapack_int m, lapack_int n,
                                const float* d, float* sep );
lapack_int LAPACKE_ddisna_work( char job, lapack_int m, lapack_int n,
                                const double* d, double* sep );

lapack_int LAPACKE_sgbbrd_work( int matrix_order, char vect, lapack_int m,
                                lapack_int n, lapack_int ncc, lapack_int kl,
                                lapack_int ku, float* ab, lapack_int ldab,
                                float* d, float* e, float* q, lapack_int ldq,
                                float* pt, lapack_int ldpt, float* c,
                                lapack_int ldc, float* work );
lapack_int LAPACKE_dgbbrd_work( int matrix_order, char vect, lapack_int m,
                                lapack_int n, lapack_int ncc, lapack_int kl,
                                lapack_int ku, double* ab, lapack_int ldab,
                                double* d, double* e, double* q, lapack_int ldq,
                                double* pt, lapack_int ldpt, double* c,
                                lapack_int ldc, double* work );
lapack_int LAPACKE_cgbbrd_work( int matrix_order, char vect, lapack_int m,
                                lapack_int n, lapack_int ncc, lapack_int kl,
                                lapack_int ku, lapack_complex_float* ab,
                                lapack_int ldab, float* d, float* e,
                                lapack_complex_float* q, lapack_int ldq,
                                lapack_complex_float* pt, lapack_int ldpt,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zgbbrd_work( int matrix_order, char vect, lapack_int m,
                                lapack_int n, lapack_int ncc, lapack_int kl,
                                lapack_int ku, lapack_complex_double* ab,
                                lapack_int ldab, double* d, double* e,
                                lapack_complex_double* q, lapack_int ldq,
                                lapack_complex_double* pt, lapack_int ldpt,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sgbcon_work( int matrix_order, char norm, lapack_int n,
                                lapack_int kl, lapack_int ku, const float* ab,
                                lapack_int ldab, const lapack_int* ipiv,
                                float anorm, float* rcond, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dgbcon_work( int matrix_order, char norm, lapack_int n,
                                lapack_int kl, lapack_int ku, const double* ab,
                                lapack_int ldab, const lapack_int* ipiv,
                                double anorm, double* rcond, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_cgbcon_work( int matrix_order, char norm, lapack_int n,
                                lapack_int kl, lapack_int ku,
                                const lapack_complex_float* ab, lapack_int ldab,
                                const lapack_int* ipiv, float anorm,
                                float* rcond, lapack_complex_float* work,
                                float* rwork );
lapack_int LAPACKE_zgbcon_work( int matrix_order, char norm, lapack_int n,
                                lapack_int kl, lapack_int ku,
                                const lapack_complex_double* ab,
                                lapack_int ldab, const lapack_int* ipiv,
                                double anorm, double* rcond,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sgbequ_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku, const float* ab,
                                lapack_int ldab, float* r, float* c,
                                float* rowcnd, float* colcnd, float* amax );
lapack_int LAPACKE_dgbequ_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku, const double* ab,
                                lapack_int ldab, double* r, double* c,
                                double* rowcnd, double* colcnd, double* amax );
lapack_int LAPACKE_cgbequ_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku,
                                const lapack_complex_float* ab, lapack_int ldab,
                                float* r, float* c, float* rowcnd,
                                float* colcnd, float* amax );
lapack_int LAPACKE_zgbequ_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku,
                                const lapack_complex_double* ab,
                                lapack_int ldab, double* r, double* c,
                                double* rowcnd, double* colcnd, double* amax );

lapack_int LAPACKE_sgbequb_work( int matrix_order, lapack_int m, lapack_int n,
                                 lapack_int kl, lapack_int ku, const float* ab,
                                 lapack_int ldab, float* r, float* c,
                                 float* rowcnd, float* colcnd, float* amax );
lapack_int LAPACKE_dgbequb_work( int matrix_order, lapack_int m, lapack_int n,
                                 lapack_int kl, lapack_int ku, const double* ab,
                                 lapack_int ldab, double* r, double* c,
                                 double* rowcnd, double* colcnd, double* amax );
lapack_int LAPACKE_cgbequb_work( int matrix_order, lapack_int m, lapack_int n,
                                 lapack_int kl, lapack_int ku,
                                 const lapack_complex_float* ab,
                                 lapack_int ldab, float* r, float* c,
                                 float* rowcnd, float* colcnd, float* amax );
lapack_int LAPACKE_zgbequb_work( int matrix_order, lapack_int m, lapack_int n,
                                 lapack_int kl, lapack_int ku,
                                 const lapack_complex_double* ab,
                                 lapack_int ldab, double* r, double* c,
                                 double* rowcnd, double* colcnd, double* amax );

lapack_int LAPACKE_sgbrfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int kl, lapack_int ku, lapack_int nrhs,
                                const float* ab, lapack_int ldab,
                                const float* afb, lapack_int ldafb,
                                const lapack_int* ipiv, const float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* ferr, float* berr, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dgbrfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int kl, lapack_int ku, lapack_int nrhs,
                                const double* ab, lapack_int ldab,
                                const double* afb, lapack_int ldafb,
                                const lapack_int* ipiv, const double* b,
                                lapack_int ldb, double* x, lapack_int ldx,
                                double* ferr, double* berr, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_cgbrfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int kl, lapack_int ku, lapack_int nrhs,
                                const lapack_complex_float* ab, lapack_int ldab,
                                const lapack_complex_float* afb,
                                lapack_int ldafb, const lapack_int* ipiv,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zgbrfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int kl, lapack_int ku, lapack_int nrhs,
                                const lapack_complex_double* ab,
                                lapack_int ldab,
                                const lapack_complex_double* afb,
                                lapack_int ldafb, const lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sgbrfsx_work( int matrix_order, char trans, char equed,
                                 lapack_int n, lapack_int kl, lapack_int ku,
                                 lapack_int nrhs, const float* ab,
                                 lapack_int ldab, const float* afb,
                                 lapack_int ldafb, const lapack_int* ipiv,
                                 const float* r, const float* c, const float* b,
                                 lapack_int ldb, float* x, lapack_int ldx,
                                 float* rcond, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, float* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_dgbrfsx_work( int matrix_order, char trans, char equed,
                                 lapack_int n, lapack_int kl, lapack_int ku,
                                 lapack_int nrhs, const double* ab,
                                 lapack_int ldab, const double* afb,
                                 lapack_int ldafb, const lapack_int* ipiv,
                                 const double* r, const double* c,
                                 const double* b, lapack_int ldb, double* x,
                                 lapack_int ldx, double* rcond, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, double* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_cgbrfsx_work( int matrix_order, char trans, char equed,
                                 lapack_int n, lapack_int kl, lapack_int ku,
                                 lapack_int nrhs,
                                 const lapack_complex_float* ab,
                                 lapack_int ldab,
                                 const lapack_complex_float* afb,
                                 lapack_int ldafb, const lapack_int* ipiv,
                                 const float* r, const float* c,
                                 const lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* x, lapack_int ldx,
                                 float* rcond, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, lapack_complex_float* work,
                                 float* rwork );
lapack_int LAPACKE_zgbrfsx_work( int matrix_order, char trans, char equed,
                                 lapack_int n, lapack_int kl, lapack_int ku,
                                 lapack_int nrhs,
                                 const lapack_complex_double* ab,
                                 lapack_int ldab,
                                 const lapack_complex_double* afb,
                                 lapack_int ldafb, const lapack_int* ipiv,
                                 const double* r, const double* c,
                                 const lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* x, lapack_int ldx,
                                 double* rcond, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, lapack_complex_double* work,
                                 double* rwork );

lapack_int LAPACKE_sgbsv_work( int matrix_order, lapack_int n, lapack_int kl,
                               lapack_int ku, lapack_int nrhs, float* ab,
                               lapack_int ldab, lapack_int* ipiv, float* b,
                               lapack_int ldb );
lapack_int LAPACKE_dgbsv_work( int matrix_order, lapack_int n, lapack_int kl,
                               lapack_int ku, lapack_int nrhs, double* ab,
                               lapack_int ldab, lapack_int* ipiv, double* b,
                               lapack_int ldb );
lapack_int LAPACKE_cgbsv_work( int matrix_order, lapack_int n, lapack_int kl,
                               lapack_int ku, lapack_int nrhs,
                               lapack_complex_float* ab, lapack_int ldab,
                               lapack_int* ipiv, lapack_complex_float* b,
                               lapack_int ldb );
lapack_int LAPACKE_zgbsv_work( int matrix_order, lapack_int n, lapack_int kl,
                               lapack_int ku, lapack_int nrhs,
                               lapack_complex_double* ab, lapack_int ldab,
                               lapack_int* ipiv, lapack_complex_double* b,
                               lapack_int ldb );

lapack_int LAPACKE_sgbsvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int kl, lapack_int ku,
                                lapack_int nrhs, float* ab, lapack_int ldab,
                                float* afb, lapack_int ldafb, lapack_int* ipiv,
                                char* equed, float* r, float* c, float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* rcond, float* ferr, float* berr,
                                float* work, lapack_int* iwork );
lapack_int LAPACKE_dgbsvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int kl, lapack_int ku,
                                lapack_int nrhs, double* ab, lapack_int ldab,
                                double* afb, lapack_int ldafb, lapack_int* ipiv,
                                char* equed, double* r, double* c, double* b,
                                lapack_int ldb, double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_cgbsvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int kl, lapack_int ku,
                                lapack_int nrhs, lapack_complex_float* ab,
                                lapack_int ldab, lapack_complex_float* afb,
                                lapack_int ldafb, lapack_int* ipiv, char* equed,
                                float* r, float* c, lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* x,
                                lapack_int ldx, float* rcond, float* ferr,
                                float* berr, lapack_complex_float* work,
                                float* rwork );
lapack_int LAPACKE_zgbsvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int kl, lapack_int ku,
                                lapack_int nrhs, lapack_complex_double* ab,
                                lapack_int ldab, lapack_complex_double* afb,
                                lapack_int ldafb, lapack_int* ipiv, char* equed,
                                double* r, double* c, lapack_complex_double* b,
                                lapack_int ldb, lapack_complex_double* x,
                                lapack_int ldx, double* rcond, double* ferr,
                                double* berr, lapack_complex_double* work,
                                double* rwork );

lapack_int LAPACKE_sgbsvxx_work( int matrix_order, char fact, char trans,
                                 lapack_int n, lapack_int kl, lapack_int ku,
                                 lapack_int nrhs, float* ab, lapack_int ldab,
                                 float* afb, lapack_int ldafb, lapack_int* ipiv,
                                 char* equed, float* r, float* c, float* b,
                                 lapack_int ldb, float* x, lapack_int ldx,
                                 float* rcond, float* rpvgrw, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, float* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_dgbsvxx_work( int matrix_order, char fact, char trans,
                                 lapack_int n, lapack_int kl, lapack_int ku,
                                 lapack_int nrhs, double* ab, lapack_int ldab,
                                 double* afb, lapack_int ldafb,
                                 lapack_int* ipiv, char* equed, double* r,
                                 double* c, double* b, lapack_int ldb,
                                 double* x, lapack_int ldx, double* rcond,
                                 double* rpvgrw, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, double* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_cgbsvxx_work( int matrix_order, char fact, char trans,
                                 lapack_int n, lapack_int kl, lapack_int ku,
                                 lapack_int nrhs, lapack_complex_float* ab,
                                 lapack_int ldab, lapack_complex_float* afb,
                                 lapack_int ldafb, lapack_int* ipiv,
                                 char* equed, float* r, float* c,
                                 lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* x, lapack_int ldx,
                                 float* rcond, float* rpvgrw, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, lapack_complex_float* work,
                                 float* rwork );
lapack_int LAPACKE_zgbsvxx_work( int matrix_order, char fact, char trans,
                                 lapack_int n, lapack_int kl, lapack_int ku,
                                 lapack_int nrhs, lapack_complex_double* ab,
                                 lapack_int ldab, lapack_complex_double* afb,
                                 lapack_int ldafb, lapack_int* ipiv,
                                 char* equed, double* r, double* c,
                                 lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* x, lapack_int ldx,
                                 double* rcond, double* rpvgrw, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, lapack_complex_double* work,
                                 double* rwork );

lapack_int LAPACKE_sgbtrf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku, float* ab,
                                lapack_int ldab, lapack_int* ipiv );
lapack_int LAPACKE_dgbtrf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku, double* ab,
                                lapack_int ldab, lapack_int* ipiv );
lapack_int LAPACKE_cgbtrf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku,
                                lapack_complex_float* ab, lapack_int ldab,
                                lapack_int* ipiv );
lapack_int LAPACKE_zgbtrf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku,
                                lapack_complex_double* ab, lapack_int ldab,
                                lapack_int* ipiv );

lapack_int LAPACKE_sgbtrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int kl, lapack_int ku, lapack_int nrhs,
                                const float* ab, lapack_int ldab,
                                const lapack_int* ipiv, float* b,
                                lapack_int ldb );
lapack_int LAPACKE_dgbtrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int kl, lapack_int ku, lapack_int nrhs,
                                const double* ab, lapack_int ldab,
                                const lapack_int* ipiv, double* b,
                                lapack_int ldb );
lapack_int LAPACKE_cgbtrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int kl, lapack_int ku, lapack_int nrhs,
                                const lapack_complex_float* ab, lapack_int ldab,
                                const lapack_int* ipiv, lapack_complex_float* b,
                                lapack_int ldb );
lapack_int LAPACKE_zgbtrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int kl, lapack_int ku, lapack_int nrhs,
                                const lapack_complex_double* ab,
                                lapack_int ldab, const lapack_int* ipiv,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sgebak_work( int matrix_order, char job, char side,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                const float* scale, lapack_int m, float* v,
                                lapack_int ldv );
lapack_int LAPACKE_dgebak_work( int matrix_order, char job, char side,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                const double* scale, lapack_int m, double* v,
                                lapack_int ldv );
lapack_int LAPACKE_cgebak_work( int matrix_order, char job, char side,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                const float* scale, lapack_int m,
                                lapack_complex_float* v, lapack_int ldv );
lapack_int LAPACKE_zgebak_work( int matrix_order, char job, char side,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                const double* scale, lapack_int m,
                                lapack_complex_double* v, lapack_int ldv );

lapack_int LAPACKE_sgebal_work( int matrix_order, char job, lapack_int n,
                                float* a, lapack_int lda, lapack_int* ilo,
                                lapack_int* ihi, float* scale );
lapack_int LAPACKE_dgebal_work( int matrix_order, char job, lapack_int n,
                                double* a, lapack_int lda, lapack_int* ilo,
                                lapack_int* ihi, double* scale );
lapack_int LAPACKE_cgebal_work( int matrix_order, char job, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int* ilo, lapack_int* ihi,
                                float* scale );
lapack_int LAPACKE_zgebal_work( int matrix_order, char job, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* ilo, lapack_int* ihi,
                                double* scale );

lapack_int LAPACKE_sgebrd_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, float* d, float* e,
                                float* tauq, float* taup, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dgebrd_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, double* d, double* e,
                                double* tauq, double* taup, double* work,
                                lapack_int lwork );
lapack_int LAPACKE_cgebrd_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                float* d, float* e, lapack_complex_float* tauq,
                                lapack_complex_float* taup,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgebrd_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                double* d, double* e,
                                lapack_complex_double* tauq,
                                lapack_complex_double* taup,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sgecon_work( int matrix_order, char norm, lapack_int n,
                                const float* a, lapack_int lda, float anorm,
                                float* rcond, float* work, lapack_int* iwork );
lapack_int LAPACKE_dgecon_work( int matrix_order, char norm, lapack_int n,
                                const double* a, lapack_int lda, double anorm,
                                double* rcond, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_cgecon_work( int matrix_order, char norm, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                float anorm, float* rcond,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zgecon_work( int matrix_order, char norm, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                double anorm, double* rcond,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sgeequ_work( int matrix_order, lapack_int m, lapack_int n,
                                const float* a, lapack_int lda, float* r,
                                float* c, float* rowcnd, float* colcnd,
                                float* amax );
lapack_int LAPACKE_dgeequ_work( int matrix_order, lapack_int m, lapack_int n,
                                const double* a, lapack_int lda, double* r,
                                double* c, double* rowcnd, double* colcnd,
                                double* amax );
lapack_int LAPACKE_cgeequ_work( int matrix_order, lapack_int m, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                float* r, float* c, float* rowcnd,
                                float* colcnd, float* amax );
lapack_int LAPACKE_zgeequ_work( int matrix_order, lapack_int m, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                double* r, double* c, double* rowcnd,
                                double* colcnd, double* amax );

lapack_int LAPACKE_sgeequb_work( int matrix_order, lapack_int m, lapack_int n,
                                 const float* a, lapack_int lda, float* r,
                                 float* c, float* rowcnd, float* colcnd,
                                 float* amax );
lapack_int LAPACKE_dgeequb_work( int matrix_order, lapack_int m, lapack_int n,
                                 const double* a, lapack_int lda, double* r,
                                 double* c, double* rowcnd, double* colcnd,
                                 double* amax );
lapack_int LAPACKE_cgeequb_work( int matrix_order, lapack_int m, lapack_int n,
                                 const lapack_complex_float* a, lapack_int lda,
                                 float* r, float* c, float* rowcnd,
                                 float* colcnd, float* amax );
lapack_int LAPACKE_zgeequb_work( int matrix_order, lapack_int m, lapack_int n,
                                 const lapack_complex_double* a, lapack_int lda,
                                 double* r, double* c, double* rowcnd,
                                 double* colcnd, double* amax );

lapack_int LAPACKE_sgees_work( int matrix_order, char jobvs, char sort,
                               LAPACK_S_SELECT2 select, lapack_int n, float* a,
                               lapack_int lda, lapack_int* sdim, float* wr,
                               float* wi, float* vs, lapack_int ldvs,
                               float* work, lapack_int lwork,
                               lapack_logical* bwork );
lapack_int LAPACKE_dgees_work( int matrix_order, char jobvs, char sort,
                               LAPACK_D_SELECT2 select, lapack_int n, double* a,
                               lapack_int lda, lapack_int* sdim, double* wr,
                               double* wi, double* vs, lapack_int ldvs,
                               double* work, lapack_int lwork,
                               lapack_logical* bwork );
lapack_int LAPACKE_cgees_work( int matrix_order, char jobvs, char sort,
                               LAPACK_C_SELECT1 select, lapack_int n,
                               lapack_complex_float* a, lapack_int lda,
                               lapack_int* sdim, lapack_complex_float* w,
                               lapack_complex_float* vs, lapack_int ldvs,
                               lapack_complex_float* work, lapack_int lwork,
                               float* rwork, lapack_logical* bwork );
lapack_int LAPACKE_zgees_work( int matrix_order, char jobvs, char sort,
                               LAPACK_Z_SELECT1 select, lapack_int n,
                               lapack_complex_double* a, lapack_int lda,
                               lapack_int* sdim, lapack_complex_double* w,
                               lapack_complex_double* vs, lapack_int ldvs,
                               lapack_complex_double* work, lapack_int lwork,
                               double* rwork, lapack_logical* bwork );

lapack_int LAPACKE_sgeesx_work( int matrix_order, char jobvs, char sort,
                                LAPACK_S_SELECT2 select, char sense,
                                lapack_int n, float* a, lapack_int lda,
                                lapack_int* sdim, float* wr, float* wi,
                                float* vs, lapack_int ldvs, float* rconde,
                                float* rcondv, float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork,
                                lapack_logical* bwork );
lapack_int LAPACKE_dgeesx_work( int matrix_order, char jobvs, char sort,
                                LAPACK_D_SELECT2 select, char sense,
                                lapack_int n, double* a, lapack_int lda,
                                lapack_int* sdim, double* wr, double* wi,
                                double* vs, lapack_int ldvs, double* rconde,
                                double* rcondv, double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork,
                                lapack_logical* bwork );
lapack_int LAPACKE_cgeesx_work( int matrix_order, char jobvs, char sort,
                                LAPACK_C_SELECT1 select, char sense,
                                lapack_int n, lapack_complex_float* a,
                                lapack_int lda, lapack_int* sdim,
                                lapack_complex_float* w,
                                lapack_complex_float* vs, lapack_int ldvs,
                                float* rconde, float* rcondv,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork, lapack_logical* bwork );
lapack_int LAPACKE_zgeesx_work( int matrix_order, char jobvs, char sort,
                                LAPACK_Z_SELECT1 select, char sense,
                                lapack_int n, lapack_complex_double* a,
                                lapack_int lda, lapack_int* sdim,
                                lapack_complex_double* w,
                                lapack_complex_double* vs, lapack_int ldvs,
                                double* rconde, double* rcondv,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork, lapack_logical* bwork );

lapack_int LAPACKE_sgeev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, float* a, lapack_int lda,
                               float* wr, float* wi, float* vl, lapack_int ldvl,
                               float* vr, lapack_int ldvr, float* work,
                               lapack_int lwork );
lapack_int LAPACKE_dgeev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, double* a, lapack_int lda,
                               double* wr, double* wi, double* vl,
                               lapack_int ldvl, double* vr, lapack_int ldvr,
                               double* work, lapack_int lwork );
lapack_int LAPACKE_cgeev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, lapack_complex_float* a,
                               lapack_int lda, lapack_complex_float* w,
                               lapack_complex_float* vl, lapack_int ldvl,
                               lapack_complex_float* vr, lapack_int ldvr,
                               lapack_complex_float* work, lapack_int lwork,
                               float* rwork );
lapack_int LAPACKE_zgeev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, lapack_complex_double* a,
                               lapack_int lda, lapack_complex_double* w,
                               lapack_complex_double* vl, lapack_int ldvl,
                               lapack_complex_double* vr, lapack_int ldvr,
                               lapack_complex_double* work, lapack_int lwork,
                               double* rwork );

lapack_int LAPACKE_sgeevx_work( int matrix_order, char balanc, char jobvl,
                                char jobvr, char sense, lapack_int n, float* a,
                                lapack_int lda, float* wr, float* wi, float* vl,
                                lapack_int ldvl, float* vr, lapack_int ldvr,
                                lapack_int* ilo, lapack_int* ihi, float* scale,
                                float* abnrm, float* rconde, float* rcondv,
                                float* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_dgeevx_work( int matrix_order, char balanc, char jobvl,
                                char jobvr, char sense, lapack_int n, double* a,
                                lapack_int lda, double* wr, double* wi,
                                double* vl, lapack_int ldvl, double* vr,
                                lapack_int ldvr, lapack_int* ilo,
                                lapack_int* ihi, double* scale, double* abnrm,
                                double* rconde, double* rcondv, double* work,
                                lapack_int lwork, lapack_int* iwork );
lapack_int LAPACKE_cgeevx_work( int matrix_order, char balanc, char jobvl,
                                char jobvr, char sense, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* w,
                                lapack_complex_float* vl, lapack_int ldvl,
                                lapack_complex_float* vr, lapack_int ldvr,
                                lapack_int* ilo, lapack_int* ihi, float* scale,
                                float* abnrm, float* rconde, float* rcondv,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork );
lapack_int LAPACKE_zgeevx_work( int matrix_order, char balanc, char jobvl,
                                char jobvr, char sense, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* w,
                                lapack_complex_double* vl, lapack_int ldvl,
                                lapack_complex_double* vr, lapack_int ldvr,
                                lapack_int* ilo, lapack_int* ihi, double* scale,
                                double* abnrm, double* rconde, double* rcondv,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork );

lapack_int LAPACKE_sgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,
                                lapack_int ihi, float* a, lapack_int lda,
                                float* tau, float* work, lapack_int lwork );
lapack_int LAPACKE_dgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,
                                lapack_int ihi, double* a, lapack_int lda,
                                double* tau, double* work, lapack_int lwork );
lapack_int LAPACKE_cgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,
                                lapack_int ihi, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,
                                lapack_int ihi, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sgejsv_work( int matrix_order, char joba, char jobu,
                                char jobv, char jobr, char jobt, char jobp,
                                lapack_int m, lapack_int n, float* a,
                                lapack_int lda, float* sva, float* u,
                                lapack_int ldu, float* v, lapack_int ldv,
                                float* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_dgejsv_work( int matrix_order, char joba, char jobu,
                                char jobv, char jobr, char jobt, char jobp,
                                lapack_int m, lapack_int n, double* a,
                                lapack_int lda, double* sva, double* u,
                                lapack_int ldu, double* v, lapack_int ldv,
                                double* work, lapack_int lwork,
                                lapack_int* iwork );

lapack_int LAPACKE_sgelq2_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, float* tau,
                                float* work );
lapack_int LAPACKE_dgelq2_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, double* tau,
                                double* work );
lapack_int LAPACKE_cgelq2_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* tau,
                                lapack_complex_float* work );
lapack_int LAPACKE_zgelq2_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* tau,
                                lapack_complex_double* work );

lapack_int LAPACKE_sgelqf_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, float* tau,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dgelqf_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, double* tau,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_cgelqf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgelqf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sgels_work( int matrix_order, char trans, lapack_int m,
                               lapack_int n, lapack_int nrhs, float* a,
                               lapack_int lda, float* b, lapack_int ldb,
                               float* work, lapack_int lwork );
lapack_int LAPACKE_dgels_work( int matrix_order, char trans, lapack_int m,
                               lapack_int n, lapack_int nrhs, double* a,
                               lapack_int lda, double* b, lapack_int ldb,
                               double* work, lapack_int lwork );
lapack_int LAPACKE_cgels_work( int matrix_order, char trans, lapack_int m,
                               lapack_int n, lapack_int nrhs,
                               lapack_complex_float* a, lapack_int lda,
                               lapack_complex_float* b, lapack_int ldb,
                               lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgels_work( int matrix_order, char trans, lapack_int m,
                               lapack_int n, lapack_int nrhs,
                               lapack_complex_double* a, lapack_int lda,
                               lapack_complex_double* b, lapack_int ldb,
                               lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sgelsd_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, float* a, lapack_int lda,
                                float* b, lapack_int ldb, float* s, float rcond,
                                lapack_int* rank, float* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_dgelsd_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, double* a, lapack_int lda,
                                double* b, lapack_int ldb, double* s,
                                double rcond, lapack_int* rank, double* work,
                                lapack_int lwork, lapack_int* iwork );
lapack_int LAPACKE_cgelsd_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* b,
                                lapack_int ldb, float* s, float rcond,
                                lapack_int* rank, lapack_complex_float* work,
                                lapack_int lwork, float* rwork,
                                lapack_int* iwork );
lapack_int LAPACKE_zgelsd_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb, double* s, double rcond,
                                lapack_int* rank, lapack_complex_double* work,
                                lapack_int lwork, double* rwork,
                                lapack_int* iwork );

lapack_int LAPACKE_sgelss_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, float* a, lapack_int lda,
                                float* b, lapack_int ldb, float* s, float rcond,
                                lapack_int* rank, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dgelss_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, double* a, lapack_int lda,
                                double* b, lapack_int ldb, double* s,
                                double rcond, lapack_int* rank, double* work,
                                lapack_int lwork );
lapack_int LAPACKE_cgelss_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* b,
                                lapack_int ldb, float* s, float rcond,
                                lapack_int* rank, lapack_complex_float* work,
                                lapack_int lwork, float* rwork );
lapack_int LAPACKE_zgelss_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb, double* s, double rcond,
                                lapack_int* rank, lapack_complex_double* work,
                                lapack_int lwork, double* rwork );

lapack_int LAPACKE_sgelsy_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, float* a, lapack_int lda,
                                float* b, lapack_int ldb, lapack_int* jpvt,
                                float rcond, lapack_int* rank, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dgelsy_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, double* a, lapack_int lda,
                                double* b, lapack_int ldb, lapack_int* jpvt,
                                double rcond, lapack_int* rank, double* work,
                                lapack_int lwork );
lapack_int LAPACKE_cgelsy_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* b,
                                lapack_int ldb, lapack_int* jpvt, float rcond,
                                lapack_int* rank, lapack_complex_float* work,
                                lapack_int lwork, float* rwork );
lapack_int LAPACKE_zgelsy_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nrhs, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb, lapack_int* jpvt, double rcond,
                                lapack_int* rank, lapack_complex_double* work,
                                lapack_int lwork, double* rwork );

lapack_int LAPACKE_sgeqlf_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, float* tau,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dgeqlf_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, double* tau,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_cgeqlf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgeqlf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sgeqp3_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, lapack_int* jpvt,
                                float* tau, float* work, lapack_int lwork );
lapack_int LAPACKE_dgeqp3_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, lapack_int* jpvt,
                                double* tau, double* work, lapack_int lwork );
lapack_int LAPACKE_cgeqp3_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int* jpvt, lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork );
lapack_int LAPACKE_zgeqp3_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* jpvt, lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork );

lapack_int LAPACKE_sgeqpf_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, lapack_int* jpvt,
                                float* tau, float* work );
lapack_int LAPACKE_dgeqpf_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, lapack_int* jpvt,
                                double* tau, double* work );
lapack_int LAPACKE_cgeqpf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int* jpvt, lapack_complex_float* tau,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zgeqpf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* jpvt, lapack_complex_double* tau,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sgeqr2_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, float* tau,
                                float* work );
lapack_int LAPACKE_dgeqr2_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, double* tau,
                                double* work );
lapack_int LAPACKE_cgeqr2_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* tau,
                                lapack_complex_float* work );
lapack_int LAPACKE_zgeqr2_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* tau,
                                lapack_complex_double* work );

lapack_int LAPACKE_sgeqrf_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, float* tau,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dgeqrf_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, double* tau,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_cgeqrf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgeqrf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,
                                 float* a, lapack_int lda, float* tau,
                                 float* work, lapack_int lwork );
lapack_int LAPACKE_dgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,
                                 double* a, lapack_int lda, double* tau,
                                 double* work, lapack_int lwork );
lapack_int LAPACKE_cgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,
                                 lapack_complex_float* a, lapack_int lda,
                                 lapack_complex_float* tau,
                                 lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,
                                 lapack_complex_double* a, lapack_int lda,
                                 lapack_complex_double* tau,
                                 lapack_complex_double* work,
                                 lapack_int lwork );

lapack_int LAPACKE_sgerfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const float* a, lapack_int lda,
                                const float* af, lapack_int ldaf,
                                const lapack_int* ipiv, const float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* ferr, float* berr, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dgerfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const double* a,
                                lapack_int lda, const double* af,
                                lapack_int ldaf, const lapack_int* ipiv,
                                const double* b, lapack_int ldb, double* x,
                                lapack_int ldx, double* ferr, double* berr,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_cgerfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* af,
                                lapack_int ldaf, const lapack_int* ipiv,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zgerfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const lapack_complex_double* a,
                                lapack_int lda, const lapack_complex_double* af,
                                lapack_int ldaf, const lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sgerfsx_work( int matrix_order, char trans, char equed,
                                 lapack_int n, lapack_int nrhs, const float* a,
                                 lapack_int lda, const float* af,
                                 lapack_int ldaf, const lapack_int* ipiv,
                                 const float* r, const float* c, const float* b,
                                 lapack_int ldb, float* x, lapack_int ldx,
                                 float* rcond, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, float* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_dgerfsx_work( int matrix_order, char trans, char equed,
                                 lapack_int n, lapack_int nrhs, const double* a,
                                 lapack_int lda, const double* af,
                                 lapack_int ldaf, const lapack_int* ipiv,
                                 const double* r, const double* c,
                                 const double* b, lapack_int ldb, double* x,
                                 lapack_int ldx, double* rcond, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, double* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_cgerfsx_work( int matrix_order, char trans, char equed,
                                 lapack_int n, lapack_int nrhs,
                                 const lapack_complex_float* a, lapack_int lda,
                                 const lapack_complex_float* af,
                                 lapack_int ldaf, const lapack_int* ipiv,
                                 const float* r, const float* c,
                                 const lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* x, lapack_int ldx,
                                 float* rcond, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, lapack_complex_float* work,
                                 float* rwork );
lapack_int LAPACKE_zgerfsx_work( int matrix_order, char trans, char equed,
                                 lapack_int n, lapack_int nrhs,
                                 const lapack_complex_double* a, lapack_int lda,
                                 const lapack_complex_double* af,
                                 lapack_int ldaf, const lapack_int* ipiv,
                                 const double* r, const double* c,
                                 const lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* x, lapack_int ldx,
                                 double* rcond, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, lapack_complex_double* work,
                                 double* rwork );

lapack_int LAPACKE_sgerqf_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, float* tau,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dgerqf_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, double* tau,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_cgerqf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgerqf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sgesdd_work( int matrix_order, char jobz, lapack_int m,
                                lapack_int n, float* a, lapack_int lda,
                                float* s, float* u, lapack_int ldu, float* vt,
                                lapack_int ldvt, float* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_dgesdd_work( int matrix_order, char jobz, lapack_int m,
                                lapack_int n, double* a, lapack_int lda,
                                double* s, double* u, lapack_int ldu,
                                double* vt, lapack_int ldvt, double* work,
                                lapack_int lwork, lapack_int* iwork );
lapack_int LAPACKE_cgesdd_work( int matrix_order, char jobz, lapack_int m,
                                lapack_int n, lapack_complex_float* a,
                                lapack_int lda, float* s,
                                lapack_complex_float* u, lapack_int ldu,
                                lapack_complex_float* vt, lapack_int ldvt,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork, lapack_int* iwork );
lapack_int LAPACKE_zgesdd_work( int matrix_order, char jobz, lapack_int m,
                                lapack_int n, lapack_complex_double* a,
                                lapack_int lda, double* s,
                                lapack_complex_double* u, lapack_int ldu,
                                lapack_complex_double* vt, lapack_int ldvt,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork, lapack_int* iwork );

lapack_int LAPACKE_sgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               float* a, lapack_int lda, lapack_int* ipiv,
                               float* b, lapack_int ldb );
lapack_int LAPACKE_dgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               double* a, lapack_int lda, lapack_int* ipiv,
                               double* b, lapack_int ldb );
lapack_int LAPACKE_cgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               lapack_complex_float* a, lapack_int lda,
                               lapack_int* ipiv, lapack_complex_float* b,
                               lapack_int ldb );
lapack_int LAPACKE_zgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               lapack_complex_double* a, lapack_int lda,
                               lapack_int* ipiv, lapack_complex_double* b,
                               lapack_int ldb );
lapack_int LAPACKE_dsgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                                double* a, lapack_int lda, lapack_int* ipiv,
                                double* b, lapack_int ldb, double* x,
                                lapack_int ldx, double* work, float* swork,
                                lapack_int* iter );
lapack_int LAPACKE_zcgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* ipiv, lapack_complex_double* b,
                                lapack_int ldb, lapack_complex_double* x,
                                lapack_int ldx, lapack_complex_double* work,
                                lapack_complex_float* swork, double* rwork,
                                lapack_int* iter );

lapack_int LAPACKE_sgesvd_work( int matrix_order, char jobu, char jobvt,
                                lapack_int m, lapack_int n, float* a,
                                lapack_int lda, float* s, float* u,
                                lapack_int ldu, float* vt, lapack_int ldvt,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dgesvd_work( int matrix_order, char jobu, char jobvt,
                                lapack_int m, lapack_int n, double* a,
                                lapack_int lda, double* s, double* u,
                                lapack_int ldu, double* vt, lapack_int ldvt,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_cgesvd_work( int matrix_order, char jobu, char jobvt,
                                lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                float* s, lapack_complex_float* u,
                                lapack_int ldu, lapack_complex_float* vt,
                                lapack_int ldvt, lapack_complex_float* work,
                                lapack_int lwork, float* rwork );
lapack_int LAPACKE_zgesvd_work( int matrix_order, char jobu, char jobvt,
                                lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                double* s, lapack_complex_double* u,
                                lapack_int ldu, lapack_complex_double* vt,
                                lapack_int ldvt, lapack_complex_double* work,
                                lapack_int lwork, double* rwork );

lapack_int LAPACKE_sgesvj_work( int matrix_order, char joba, char jobu,
                                char jobv, lapack_int m, lapack_int n, float* a,
                                lapack_int lda, float* sva, lapack_int mv,
                                float* v, lapack_int ldv, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dgesvj_work( int matrix_order, char joba, char jobu,
                                char jobv, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, double* sva,
                                lapack_int mv, double* v, lapack_int ldv,
                                double* work, lapack_int lwork );

lapack_int LAPACKE_sgesvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int nrhs, float* a,
                                lapack_int lda, float* af, lapack_int ldaf,
                                lapack_int* ipiv, char* equed, float* r,
                                float* c, float* b, lapack_int ldb, float* x,
                                lapack_int ldx, float* rcond, float* ferr,
                                float* berr, float* work, lapack_int* iwork );
lapack_int LAPACKE_dgesvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int nrhs, double* a,
                                lapack_int lda, double* af, lapack_int ldaf,
                                lapack_int* ipiv, char* equed, double* r,
                                double* c, double* b, lapack_int ldb, double* x,
                                lapack_int ldx, double* rcond, double* ferr,
                                double* berr, double* work, lapack_int* iwork );
lapack_int LAPACKE_cgesvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int nrhs,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* af, lapack_int ldaf,
                                lapack_int* ipiv, char* equed, float* r,
                                float* c, lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* x,
                                lapack_int ldx, float* rcond, float* ferr,
                                float* berr, lapack_complex_float* work,
                                float* rwork );
lapack_int LAPACKE_zgesvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int nrhs,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* af, lapack_int ldaf,
                                lapack_int* ipiv, char* equed, double* r,
                                double* c, lapack_complex_double* b,
                                lapack_int ldb, lapack_complex_double* x,
                                lapack_int ldx, double* rcond, double* ferr,
                                double* berr, lapack_complex_double* work,
                                double* rwork );

lapack_int LAPACKE_sgesvxx_work( int matrix_order, char fact, char trans,
                                 lapack_int n, lapack_int nrhs, float* a,
                                 lapack_int lda, float* af, lapack_int ldaf,
                                 lapack_int* ipiv, char* equed, float* r,
                                 float* c, float* b, lapack_int ldb, float* x,
                                 lapack_int ldx, float* rcond, float* rpvgrw,
                                 float* berr, lapack_int n_err_bnds,
                                 float* err_bnds_norm, float* err_bnds_comp,
                                 lapack_int nparams, float* params, float* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_dgesvxx_work( int matrix_order, char fact, char trans,
                                 lapack_int n, lapack_int nrhs, double* a,
                                 lapack_int lda, double* af, lapack_int ldaf,
                                 lapack_int* ipiv, char* equed, double* r,
                                 double* c, double* b, lapack_int ldb,
                                 double* x, lapack_int ldx, double* rcond,
                                 double* rpvgrw, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, double* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_cgesvxx_work( int matrix_order, char fact, char trans,
                                 lapack_int n, lapack_int nrhs,
                                 lapack_complex_float* a, lapack_int lda,
                                 lapack_complex_float* af, lapack_int ldaf,
                                 lapack_int* ipiv, char* equed, float* r,
                                 float* c, lapack_complex_float* b,
                                 lapack_int ldb, lapack_complex_float* x,
                                 lapack_int ldx, float* rcond, float* rpvgrw,
                                 float* berr, lapack_int n_err_bnds,
                                 float* err_bnds_norm, float* err_bnds_comp,
                                 lapack_int nparams, float* params,
                                 lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zgesvxx_work( int matrix_order, char fact, char trans,
                                 lapack_int n, lapack_int nrhs,
                                 lapack_complex_double* a, lapack_int lda,
                                 lapack_complex_double* af, lapack_int ldaf,
                                 lapack_int* ipiv, char* equed, double* r,
                                 double* c, lapack_complex_double* b,
                                 lapack_int ldb, lapack_complex_double* x,
                                 lapack_int ldx, double* rcond, double* rpvgrw,
                                 double* berr, lapack_int n_err_bnds,
                                 double* err_bnds_norm, double* err_bnds_comp,
                                 lapack_int nparams, double* params,
                                 lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sgetf2_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, lapack_int* ipiv );
lapack_int LAPACKE_dgetf2_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, lapack_int* ipiv );
lapack_int LAPACKE_cgetf2_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int* ipiv );
lapack_int LAPACKE_zgetf2_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* ipiv );

lapack_int LAPACKE_sgetrf_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, lapack_int* ipiv );
lapack_int LAPACKE_dgetrf_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, lapack_int* ipiv );
lapack_int LAPACKE_cgetrf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int* ipiv );
lapack_int LAPACKE_zgetrf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* ipiv );

lapack_int LAPACKE_sgetri_work( int matrix_order, lapack_int n, float* a,
                                lapack_int lda, const lapack_int* ipiv,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dgetri_work( int matrix_order, lapack_int n, double* a,
                                lapack_int lda, const lapack_int* ipiv,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_cgetri_work( int matrix_order, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                const lapack_int* ipiv,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgetri_work( int matrix_order, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                const lapack_int* ipiv,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sgetrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const float* a, lapack_int lda,
                                const lapack_int* ipiv, float* b,
                                lapack_int ldb );
lapack_int LAPACKE_dgetrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const double* a,
                                lapack_int lda, const lapack_int* ipiv,
                                double* b, lapack_int ldb );
lapack_int LAPACKE_cgetrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* a,
                                lapack_int lda, const lapack_int* ipiv,
                                lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zgetrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const lapack_complex_double* a,
                                lapack_int lda, const lapack_int* ipiv,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sggbak_work( int matrix_order, char job, char side,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                const float* lscale, const float* rscale,
                                lapack_int m, float* v, lapack_int ldv );
lapack_int LAPACKE_dggbak_work( int matrix_order, char job, char side,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                const double* lscale, const double* rscale,
                                lapack_int m, double* v, lapack_int ldv );
lapack_int LAPACKE_cggbak_work( int matrix_order, char job, char side,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                const float* lscale, const float* rscale,
                                lapack_int m, lapack_complex_float* v,
                                lapack_int ldv );
lapack_int LAPACKE_zggbak_work( int matrix_order, char job, char side,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                const double* lscale, const double* rscale,
                                lapack_int m, lapack_complex_double* v,
                                lapack_int ldv );

lapack_int LAPACKE_sggbal_work( int matrix_order, char job, lapack_int n,
                                float* a, lapack_int lda, float* b,
                                lapack_int ldb, lapack_int* ilo,
                                lapack_int* ihi, float* lscale, float* rscale,
                                float* work );
lapack_int LAPACKE_dggbal_work( int matrix_order, char job, lapack_int n,
                                double* a, lapack_int lda, double* b,
                                lapack_int ldb, lapack_int* ilo,
                                lapack_int* ihi, double* lscale, double* rscale,
                                double* work );
lapack_int LAPACKE_cggbal_work( int matrix_order, char job, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                lapack_int* ilo, lapack_int* ihi, float* lscale,
                                float* rscale, float* work );
lapack_int LAPACKE_zggbal_work( int matrix_order, char job, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_int* ilo, lapack_int* ihi,
                                double* lscale, double* rscale, double* work );

lapack_int LAPACKE_sgges_work( int matrix_order, char jobvsl, char jobvsr,
                               char sort, LAPACK_S_SELECT3 selctg, lapack_int n,
                               float* a, lapack_int lda, float* b,
                               lapack_int ldb, lapack_int* sdim, float* alphar,
                               float* alphai, float* beta, float* vsl,
                               lapack_int ldvsl, float* vsr, lapack_int ldvsr,
                               float* work, lapack_int lwork,
                               lapack_logical* bwork );
lapack_int LAPACKE_dgges_work( int matrix_order, char jobvsl, char jobvsr,
                               char sort, LAPACK_D_SELECT3 selctg, lapack_int n,
                               double* a, lapack_int lda, double* b,
                               lapack_int ldb, lapack_int* sdim, double* alphar,
                               double* alphai, double* beta, double* vsl,
                               lapack_int ldvsl, double* vsr, lapack_int ldvsr,
                               double* work, lapack_int lwork,
                               lapack_logical* bwork );
lapack_int LAPACKE_cgges_work( int matrix_order, char jobvsl, char jobvsr,
                               char sort, LAPACK_C_SELECT2 selctg, lapack_int n,
                               lapack_complex_float* a, lapack_int lda,
                               lapack_complex_float* b, lapack_int ldb,
                               lapack_int* sdim, lapack_complex_float* alpha,
                               lapack_complex_float* beta,
                               lapack_complex_float* vsl, lapack_int ldvsl,
                               lapack_complex_float* vsr, lapack_int ldvsr,
                               lapack_complex_float* work, lapack_int lwork,
                               float* rwork, lapack_logical* bwork );
lapack_int LAPACKE_zgges_work( int matrix_order, char jobvsl, char jobvsr,
                               char sort, LAPACK_Z_SELECT2 selctg, lapack_int n,
                               lapack_complex_double* a, lapack_int lda,
                               lapack_complex_double* b, lapack_int ldb,
                               lapack_int* sdim, lapack_complex_double* alpha,
                               lapack_complex_double* beta,
                               lapack_complex_double* vsl, lapack_int ldvsl,
                               lapack_complex_double* vsr, lapack_int ldvsr,
                               lapack_complex_double* work, lapack_int lwork,
                               double* rwork, lapack_logical* bwork );

lapack_int LAPACKE_sggesx_work( int matrix_order, char jobvsl, char jobvsr,
                                char sort, LAPACK_S_SELECT3 selctg, char sense,
                                lapack_int n, float* a, lapack_int lda,
                                float* b, lapack_int ldb, lapack_int* sdim,
                                float* alphar, float* alphai, float* beta,
                                float* vsl, lapack_int ldvsl, float* vsr,
                                lapack_int ldvsr, float* rconde, float* rcondv,
                                float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork,
                                lapack_logical* bwork );
lapack_int LAPACKE_dggesx_work( int matrix_order, char jobvsl, char jobvsr,
                                char sort, LAPACK_D_SELECT3 selctg, char sense,
                                lapack_int n, double* a, lapack_int lda,
                                double* b, lapack_int ldb, lapack_int* sdim,
                                double* alphar, double* alphai, double* beta,
                                double* vsl, lapack_int ldvsl, double* vsr,
                                lapack_int ldvsr, double* rconde,
                                double* rcondv, double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork,
                                lapack_logical* bwork );
lapack_int LAPACKE_cggesx_work( int matrix_order, char jobvsl, char jobvsr,
                                char sort, LAPACK_C_SELECT2 selctg, char sense,
                                lapack_int n, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* b,
                                lapack_int ldb, lapack_int* sdim,
                                lapack_complex_float* alpha,
                                lapack_complex_float* beta,
                                lapack_complex_float* vsl, lapack_int ldvsl,
                                lapack_complex_float* vsr, lapack_int ldvsr,
                                float* rconde, float* rcondv,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork, lapack_int* iwork,
                                lapack_int liwork, lapack_logical* bwork );
lapack_int LAPACKE_zggesx_work( int matrix_order, char jobvsl, char jobvsr,
                                char sort, LAPACK_Z_SELECT2 selctg, char sense,
                                lapack_int n, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb, lapack_int* sdim,
                                lapack_complex_double* alpha,
                                lapack_complex_double* beta,
                                lapack_complex_double* vsl, lapack_int ldvsl,
                                lapack_complex_double* vsr, lapack_int ldvsr,
                                double* rconde, double* rcondv,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork, lapack_int* iwork,
                                lapack_int liwork, lapack_logical* bwork );

lapack_int LAPACKE_sggev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, float* a, lapack_int lda, float* b,
                               lapack_int ldb, float* alphar, float* alphai,
                               float* beta, float* vl, lapack_int ldvl,
                               float* vr, lapack_int ldvr, float* work,
                               lapack_int lwork );
lapack_int LAPACKE_dggev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, double* a, lapack_int lda,
                               double* b, lapack_int ldb, double* alphar,
                               double* alphai, double* beta, double* vl,
                               lapack_int ldvl, double* vr, lapack_int ldvr,
                               double* work, lapack_int lwork );
lapack_int LAPACKE_cggev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, lapack_complex_float* a,
                               lapack_int lda, lapack_complex_float* b,
                               lapack_int ldb, lapack_complex_float* alpha,
                               lapack_complex_float* beta,
                               lapack_complex_float* vl, lapack_int ldvl,
                               lapack_complex_float* vr, lapack_int ldvr,
                               lapack_complex_float* work, lapack_int lwork,
                               float* rwork );
lapack_int LAPACKE_zggev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, lapack_complex_double* a,
                               lapack_int lda, lapack_complex_double* b,
                               lapack_int ldb, lapack_complex_double* alpha,
                               lapack_complex_double* beta,
                               lapack_complex_double* vl, lapack_int ldvl,
                               lapack_complex_double* vr, lapack_int ldvr,
                               lapack_complex_double* work, lapack_int lwork,
                               double* rwork );

lapack_int LAPACKE_sggevx_work( int matrix_order, char balanc, char jobvl,
                                char jobvr, char sense, lapack_int n, float* a,
                                lapack_int lda, float* b, lapack_int ldb,
                                float* alphar, float* alphai, float* beta,
                                float* vl, lapack_int ldvl, float* vr,
                                lapack_int ldvr, lapack_int* ilo,
                                lapack_int* ihi, float* lscale, float* rscale,
                                float* abnrm, float* bbnrm, float* rconde,
                                float* rcondv, float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_logical* bwork );
lapack_int LAPACKE_dggevx_work( int matrix_order, char balanc, char jobvl,
                                char jobvr, char sense, lapack_int n, double* a,
                                lapack_int lda, double* b, lapack_int ldb,
                                double* alphar, double* alphai, double* beta,
                                double* vl, lapack_int ldvl, double* vr,
                                lapack_int ldvr, lapack_int* ilo,
                                lapack_int* ihi, double* lscale, double* rscale,
                                double* abnrm, double* bbnrm, double* rconde,
                                double* rcondv, double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_logical* bwork );
lapack_int LAPACKE_cggevx_work( int matrix_order, char balanc, char jobvl,
                                char jobvr, char sense, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* alpha,
                                lapack_complex_float* beta,
                                lapack_complex_float* vl, lapack_int ldvl,
                                lapack_complex_float* vr, lapack_int ldvr,
                                lapack_int* ilo, lapack_int* ihi, float* lscale,
                                float* rscale, float* abnrm, float* bbnrm,
                                float* rconde, float* rcondv,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork, lapack_int* iwork,
                                lapack_logical* bwork );
lapack_int LAPACKE_zggevx_work( int matrix_order, char balanc, char jobvl,
                                char jobvr, char sense, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* alpha,
                                lapack_complex_double* beta,
                                lapack_complex_double* vl, lapack_int ldvl,
                                lapack_complex_double* vr, lapack_int ldvr,
                                lapack_int* ilo, lapack_int* ihi,
                                double* lscale, double* rscale, double* abnrm,
                                double* bbnrm, double* rconde, double* rcondv,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork, lapack_int* iwork,
                                lapack_logical* bwork );

lapack_int LAPACKE_sggglm_work( int matrix_order, lapack_int n, lapack_int m,
                                lapack_int p, float* a, lapack_int lda,
                                float* b, lapack_int ldb, float* d, float* x,
                                float* y, float* work, lapack_int lwork );
lapack_int LAPACKE_dggglm_work( int matrix_order, lapack_int n, lapack_int m,
                                lapack_int p, double* a, lapack_int lda,
                                double* b, lapack_int ldb, double* d, double* x,
                                double* y, double* work, lapack_int lwork );
lapack_int LAPACKE_cggglm_work( int matrix_order, lapack_int n, lapack_int m,
                                lapack_int p, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* d,
                                lapack_complex_float* x,
                                lapack_complex_float* y,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zggglm_work( int matrix_order, lapack_int n, lapack_int m,
                                lapack_int p, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb, lapack_complex_double* d,
                                lapack_complex_double* x,
                                lapack_complex_double* y,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sgghrd_work( int matrix_order, char compq, char compz,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                float* a, lapack_int lda, float* b,
                                lapack_int ldb, float* q, lapack_int ldq,
                                float* z, lapack_int ldz );
lapack_int LAPACKE_dgghrd_work( int matrix_order, char compq, char compz,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                double* a, lapack_int lda, double* b,
                                lapack_int ldb, double* q, lapack_int ldq,
                                double* z, lapack_int ldz );
lapack_int LAPACKE_cgghrd_work( int matrix_order, char compq, char compz,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* q, lapack_int ldq,
                                lapack_complex_float* z, lapack_int ldz );
lapack_int LAPACKE_zgghrd_work( int matrix_order, char compq, char compz,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* q, lapack_int ldq,
                                lapack_complex_double* z, lapack_int ldz );

lapack_int LAPACKE_sgglse_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int p, float* a, lapack_int lda,
                                float* b, lapack_int ldb, float* c, float* d,
                                float* x, float* work, lapack_int lwork );
lapack_int LAPACKE_dgglse_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int p, double* a, lapack_int lda,
                                double* b, lapack_int ldb, double* c, double* d,
                                double* x, double* work, lapack_int lwork );
lapack_int LAPACKE_cgglse_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int p, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* c,
                                lapack_complex_float* d,
                                lapack_complex_float* x,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgglse_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int p, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb, lapack_complex_double* c,
                                lapack_complex_double* d,
                                lapack_complex_double* x,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sggqrf_work( int matrix_order, lapack_int n, lapack_int m,
                                lapack_int p, float* a, lapack_int lda,
                                float* taua, float* b, lapack_int ldb,
                                float* taub, float* work, lapack_int lwork );
lapack_int LAPACKE_dggqrf_work( int matrix_order, lapack_int n, lapack_int m,
                                lapack_int p, double* a, lapack_int lda,
                                double* taua, double* b, lapack_int ldb,
                                double* taub, double* work, lapack_int lwork );
lapack_int LAPACKE_cggqrf_work( int matrix_order, lapack_int n, lapack_int m,
                                lapack_int p, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* taua,
                                lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* taub,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zggqrf_work( int matrix_order, lapack_int n, lapack_int m,
                                lapack_int p, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* taua,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* taub,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sggrqf_work( int matrix_order, lapack_int m, lapack_int p,
                                lapack_int n, float* a, lapack_int lda,
                                float* taua, float* b, lapack_int ldb,
                                float* taub, float* work, lapack_int lwork );
lapack_int LAPACKE_dggrqf_work( int matrix_order, lapack_int m, lapack_int p,
                                lapack_int n, double* a, lapack_int lda,
                                double* taua, double* b, lapack_int ldb,
                                double* taub, double* work, lapack_int lwork );
lapack_int LAPACKE_cggrqf_work( int matrix_order, lapack_int m, lapack_int p,
                                lapack_int n, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* taua,
                                lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* taub,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zggrqf_work( int matrix_order, lapack_int m, lapack_int p,
                                lapack_int n, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* taua,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* taub,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_sggsvd_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int n,
                                lapack_int p, lapack_int* k, lapack_int* l,
                                float* a, lapack_int lda, float* b,
                                lapack_int ldb, float* alpha, float* beta,
                                float* u, lapack_int ldu, float* v,
                                lapack_int ldv, float* q, lapack_int ldq,
                                float* work, lapack_int* iwork );
lapack_int LAPACKE_dggsvd_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int n,
                                lapack_int p, lapack_int* k, lapack_int* l,
                                double* a, lapack_int lda, double* b,
                                lapack_int ldb, double* alpha, double* beta,
                                double* u, lapack_int ldu, double* v,
                                lapack_int ldv, double* q, lapack_int ldq,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_cggsvd_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int n,
                                lapack_int p, lapack_int* k, lapack_int* l,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                float* alpha, float* beta,
                                lapack_complex_float* u, lapack_int ldu,
                                lapack_complex_float* v, lapack_int ldv,
                                lapack_complex_float* q, lapack_int ldq,
                                lapack_complex_float* work, float* rwork,
                                lapack_int* iwork );
lapack_int LAPACKE_zggsvd_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int n,
                                lapack_int p, lapack_int* k, lapack_int* l,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                double* alpha, double* beta,
                                lapack_complex_double* u, lapack_int ldu,
                                lapack_complex_double* v, lapack_int ldv,
                                lapack_complex_double* q, lapack_int ldq,
                                lapack_complex_double* work, double* rwork,
                                lapack_int* iwork );

lapack_int LAPACKE_sggsvp_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int p,
                                lapack_int n, float* a, lapack_int lda,
                                float* b, lapack_int ldb, float tola,
                                float tolb, lapack_int* k, lapack_int* l,
                                float* u, lapack_int ldu, float* v,
                                lapack_int ldv, float* q, lapack_int ldq,
                                lapack_int* iwork, float* tau, float* work );
lapack_int LAPACKE_dggsvp_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int p,
                                lapack_int n, double* a, lapack_int lda,
                                double* b, lapack_int ldb, double tola,
                                double tolb, lapack_int* k, lapack_int* l,
                                double* u, lapack_int ldu, double* v,
                                lapack_int ldv, double* q, lapack_int ldq,
                                lapack_int* iwork, double* tau, double* work );
lapack_int LAPACKE_cggsvp_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int p,
                                lapack_int n, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* b,
                                lapack_int ldb, float tola, float tolb,
                                lapack_int* k, lapack_int* l,
                                lapack_complex_float* u, lapack_int ldu,
                                lapack_complex_float* v, lapack_int ldv,
                                lapack_complex_float* q, lapack_int ldq,
                                lapack_int* iwork, float* rwork,
                                lapack_complex_float* tau,
                                lapack_complex_float* work );
lapack_int LAPACKE_zggsvp_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int p,
                                lapack_int n, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb, double tola, double tolb,
                                lapack_int* k, lapack_int* l,
                                lapack_complex_double* u, lapack_int ldu,
                                lapack_complex_double* v, lapack_int ldv,
                                lapack_complex_double* q, lapack_int ldq,
                                lapack_int* iwork, double* rwork,
                                lapack_complex_double* tau,
                                lapack_complex_double* work );

lapack_int LAPACKE_sgtcon_work( char norm, lapack_int n, const float* dl,
                                const float* d, const float* du,
                                const float* du2, const lapack_int* ipiv,
                                float anorm, float* rcond, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dgtcon_work( char norm, lapack_int n, const double* dl,
                                const double* d, const double* du,
                                const double* du2, const lapack_int* ipiv,
                                double anorm, double* rcond, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_cgtcon_work( char norm, lapack_int n,
                                const lapack_complex_float* dl,
                                const lapack_complex_float* d,
                                const lapack_complex_float* du,
                                const lapack_complex_float* du2,
                                const lapack_int* ipiv, float anorm,
                                float* rcond, lapack_complex_float* work );
lapack_int LAPACKE_zgtcon_work( char norm, lapack_int n,
                                const lapack_complex_double* dl,
                                const lapack_complex_double* d,
                                const lapack_complex_double* du,
                                const lapack_complex_double* du2,
                                const lapack_int* ipiv, double anorm,
                                double* rcond, lapack_complex_double* work );

lapack_int LAPACKE_sgtrfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const float* dl,
                                const float* d, const float* du,
                                const float* dlf, const float* df,
                                const float* duf, const float* du2,
                                const lapack_int* ipiv, const float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* ferr, float* berr, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dgtrfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const double* dl,
                                const double* d, const double* du,
                                const double* dlf, const double* df,
                                const double* duf, const double* du2,
                                const lapack_int* ipiv, const double* b,
                                lapack_int ldb, double* x, lapack_int ldx,
                                double* ferr, double* berr, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_cgtrfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* dl,
                                const lapack_complex_float* d,
                                const lapack_complex_float* du,
                                const lapack_complex_float* dlf,
                                const lapack_complex_float* df,
                                const lapack_complex_float* duf,
                                const lapack_complex_float* du2,
                                const lapack_int* ipiv,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zgtrfs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs,
                                const lapack_complex_double* dl,
                                const lapack_complex_double* d,
                                const lapack_complex_double* du,
                                const lapack_complex_double* dlf,
                                const lapack_complex_double* df,
                                const lapack_complex_double* duf,
                                const lapack_complex_double* du2,
                                const lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               float* dl, float* d, float* du, float* b,
                               lapack_int ldb );
lapack_int LAPACKE_dgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               double* dl, double* d, double* du, double* b,
                               lapack_int ldb );
lapack_int LAPACKE_cgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               lapack_complex_float* dl,
                               lapack_complex_float* d,
                               lapack_complex_float* du,
                               lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               lapack_complex_double* dl,
                               lapack_complex_double* d,
                               lapack_complex_double* du,
                               lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sgtsvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int nrhs, const float* dl,
                                const float* d, const float* du, float* dlf,
                                float* df, float* duf, float* du2,
                                lapack_int* ipiv, const float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* rcond, float* ferr, float* berr,
                                float* work, lapack_int* iwork );
lapack_int LAPACKE_dgtsvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int nrhs, const double* dl,
                                const double* d, const double* du, double* dlf,
                                double* df, double* duf, double* du2,
                                lapack_int* ipiv, const double* b,
                                lapack_int ldb, double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_cgtsvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_float* dl,
                                const lapack_complex_float* d,
                                const lapack_complex_float* du,
                                lapack_complex_float* dlf,
                                lapack_complex_float* df,
                                lapack_complex_float* duf,
                                lapack_complex_float* du2, lapack_int* ipiv,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* rcond, float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zgtsvx_work( int matrix_order, char fact, char trans,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_double* dl,
                                const lapack_complex_double* d,
                                const lapack_complex_double* du,
                                lapack_complex_double* dlf,
                                lapack_complex_double* df,
                                lapack_complex_double* duf,
                                lapack_complex_double* du2, lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sgttrf_work( lapack_int n, float* dl, float* d, float* du,
                                float* du2, lapack_int* ipiv );
lapack_int LAPACKE_dgttrf_work( lapack_int n, double* dl, double* d, double* du,
                                double* du2, lapack_int* ipiv );
lapack_int LAPACKE_cgttrf_work( lapack_int n, lapack_complex_float* dl,
                                lapack_complex_float* d,
                                lapack_complex_float* du,
                                lapack_complex_float* du2, lapack_int* ipiv );
lapack_int LAPACKE_zgttrf_work( lapack_int n, lapack_complex_double* dl,
                                lapack_complex_double* d,
                                lapack_complex_double* du,
                                lapack_complex_double* du2, lapack_int* ipiv );

lapack_int LAPACKE_sgttrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const float* dl,
                                const float* d, const float* du,
                                const float* du2, const lapack_int* ipiv,
                                float* b, lapack_int ldb );
lapack_int LAPACKE_dgttrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const double* dl,
                                const double* d, const double* du,
                                const double* du2, const lapack_int* ipiv,
                                double* b, lapack_int ldb );
lapack_int LAPACKE_cgttrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* dl,
                                const lapack_complex_float* d,
                                const lapack_complex_float* du,
                                const lapack_complex_float* du2,
                                const lapack_int* ipiv, lapack_complex_float* b,
                                lapack_int ldb );
lapack_int LAPACKE_zgttrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs,
                                const lapack_complex_double* dl,
                                const lapack_complex_double* d,
                                const lapack_complex_double* du,
                                const lapack_complex_double* du2,
                                const lapack_int* ipiv,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_chbev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_int kd,
                               lapack_complex_float* ab, lapack_int ldab,
                               float* w, lapack_complex_float* z,
                               lapack_int ldz, lapack_complex_float* work,
                               float* rwork );
lapack_int LAPACKE_zhbev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_int kd,
                               lapack_complex_double* ab, lapack_int ldab,
                               double* w, lapack_complex_double* z,
                               lapack_int ldz, lapack_complex_double* work,
                               double* rwork );

lapack_int LAPACKE_chbevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_int kd,
                                lapack_complex_float* ab, lapack_int ldab,
                                float* w, lapack_complex_float* z,
                                lapack_int ldz, lapack_complex_float* work,
                                lapack_int lwork, float* rwork,
                                lapack_int lrwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_zhbevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_int kd,
                                lapack_complex_double* ab, lapack_int ldab,
                                double* w, lapack_complex_double* z,
                                lapack_int ldz, lapack_complex_double* work,
                                lapack_int lwork, double* rwork,
                                lapack_int lrwork, lapack_int* iwork,
                                lapack_int liwork );

lapack_int LAPACKE_chbevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, lapack_int kd,
                                lapack_complex_float* ab, lapack_int ldab,
                                lapack_complex_float* q, lapack_int ldq,
                                float vl, float vu, lapack_int il,
                                lapack_int iu, float abstol, lapack_int* m,
                                float* w, lapack_complex_float* z,
                                lapack_int ldz, lapack_complex_float* work,
                                float* rwork, lapack_int* iwork,
                                lapack_int* ifail );
lapack_int LAPACKE_zhbevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, lapack_int kd,
                                lapack_complex_double* ab, lapack_int ldab,
                                lapack_complex_double* q, lapack_int ldq,
                                double vl, double vu, lapack_int il,
                                lapack_int iu, double abstol, lapack_int* m,
                                double* w, lapack_complex_double* z,
                                lapack_int ldz, lapack_complex_double* work,
                                double* rwork, lapack_int* iwork,
                                lapack_int* ifail );

lapack_int LAPACKE_chbgst_work( int matrix_order, char vect, char uplo,
                                lapack_int n, lapack_int ka, lapack_int kb,
                                lapack_complex_float* ab, lapack_int ldab,
                                const lapack_complex_float* bb, lapack_int ldbb,
                                lapack_complex_float* x, lapack_int ldx,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zhbgst_work( int matrix_order, char vect, char uplo,
                                lapack_int n, lapack_int ka, lapack_int kb,
                                lapack_complex_double* ab, lapack_int ldab,
                                const lapack_complex_double* bb,
                                lapack_int ldbb, lapack_complex_double* x,
                                lapack_int ldx, lapack_complex_double* work,
                                double* rwork );

lapack_int LAPACKE_chbgv_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_int ka, lapack_int kb,
                               lapack_complex_float* ab, lapack_int ldab,
                               lapack_complex_float* bb, lapack_int ldbb,
                               float* w, lapack_complex_float* z,
                               lapack_int ldz, lapack_complex_float* work,
                               float* rwork );
lapack_int LAPACKE_zhbgv_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_int ka, lapack_int kb,
                               lapack_complex_double* ab, lapack_int ldab,
                               lapack_complex_double* bb, lapack_int ldbb,
                               double* w, lapack_complex_double* z,
                               lapack_int ldz, lapack_complex_double* work,
                               double* rwork );

lapack_int LAPACKE_chbgvd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_int ka, lapack_int kb,
                                lapack_complex_float* ab, lapack_int ldab,
                                lapack_complex_float* bb, lapack_int ldbb,
                                float* w, lapack_complex_float* z,
                                lapack_int ldz, lapack_complex_float* work,
                                lapack_int lwork, float* rwork,
                                lapack_int lrwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_zhbgvd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_int ka, lapack_int kb,
                                lapack_complex_double* ab, lapack_int ldab,
                                lapack_complex_double* bb, lapack_int ldbb,
                                double* w, lapack_complex_double* z,
                                lapack_int ldz, lapack_complex_double* work,
                                lapack_int lwork, double* rwork,
                                lapack_int lrwork, lapack_int* iwork,
                                lapack_int liwork );

lapack_int LAPACKE_chbgvx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, lapack_int ka,
                                lapack_int kb, lapack_complex_float* ab,
                                lapack_int ldab, lapack_complex_float* bb,
                                lapack_int ldbb, lapack_complex_float* q,
                                lapack_int ldq, float vl, float vu,
                                lapack_int il, lapack_int iu, float abstol,
                                lapack_int* m, float* w,
                                lapack_complex_float* z, lapack_int ldz,
                                lapack_complex_float* work, float* rwork,
                                lapack_int* iwork, lapack_int* ifail );
lapack_int LAPACKE_zhbgvx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, lapack_int ka,
                                lapack_int kb, lapack_complex_double* ab,
                                lapack_int ldab, lapack_complex_double* bb,
                                lapack_int ldbb, lapack_complex_double* q,
                                lapack_int ldq, double vl, double vu,
                                lapack_int il, lapack_int iu, double abstol,
                                lapack_int* m, double* w,
                                lapack_complex_double* z, lapack_int ldz,
                                lapack_complex_double* work, double* rwork,
                                lapack_int* iwork, lapack_int* ifail );

lapack_int LAPACKE_chbtrd_work( int matrix_order, char vect, char uplo,
                                lapack_int n, lapack_int kd,
                                lapack_complex_float* ab, lapack_int ldab,
                                float* d, float* e, lapack_complex_float* q,
                                lapack_int ldq, lapack_complex_float* work );
lapack_int LAPACKE_zhbtrd_work( int matrix_order, char vect, char uplo,
                                lapack_int n, lapack_int kd,
                                lapack_complex_double* ab, lapack_int ldab,
                                double* d, double* e, lapack_complex_double* q,
                                lapack_int ldq, lapack_complex_double* work );

lapack_int LAPACKE_checon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_int* ipiv, float anorm,
                                float* rcond, lapack_complex_float* work );
lapack_int LAPACKE_zhecon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_int* ipiv, double anorm,
                                double* rcond, lapack_complex_double* work );

lapack_int LAPACKE_cheequb_work( int matrix_order, char uplo, lapack_int n,
                                 const lapack_complex_float* a, lapack_int lda,
                                 float* s, float* scond, float* amax,
                                 lapack_complex_float* work );
lapack_int LAPACKE_zheequb_work( int matrix_order, char uplo, lapack_int n,
                                 const lapack_complex_double* a, lapack_int lda,
                                 double* s, double* scond, double* amax,
                                 lapack_complex_double* work );

lapack_int LAPACKE_cheev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_complex_float* a,
                               lapack_int lda, float* w,
                               lapack_complex_float* work, lapack_int lwork,
                               float* rwork );
lapack_int LAPACKE_zheev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_complex_double* a,
                               lapack_int lda, double* w,
                               lapack_complex_double* work, lapack_int lwork,
                               double* rwork );

lapack_int LAPACKE_cheevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_complex_float* a,
                                lapack_int lda, float* w,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork, lapack_int lrwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_zheevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_complex_double* a,
                                lapack_int lda, double* w,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork, lapack_int lrwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_cheevr_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                float vl, float vu, lapack_int il,
                                lapack_int iu, float abstol, lapack_int* m,
                                float* w, lapack_complex_float* z,
                                lapack_int ldz, lapack_int* isuppz,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork, lapack_int lrwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_zheevr_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                double vl, double vu, lapack_int il,
                                lapack_int iu, double abstol, lapack_int* m,
                                double* w, lapack_complex_double* z,
                                lapack_int ldz, lapack_int* isuppz,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork, lapack_int lrwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_cheevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                float vl, float vu, lapack_int il,
                                lapack_int iu, float abstol, lapack_int* m,
                                float* w, lapack_complex_float* z,
                                lapack_int ldz, lapack_complex_float* work,
                                lapack_int lwork, float* rwork,
                                lapack_int* iwork, lapack_int* ifail );
lapack_int LAPACKE_zheevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                double vl, double vu, lapack_int il,
                                lapack_int iu, double abstol, lapack_int* m,
                                double* w, lapack_complex_double* z,
                                lapack_int ldz, lapack_complex_double* work,
                                lapack_int lwork, double* rwork,
                                lapack_int* iwork, lapack_int* ifail );

lapack_int LAPACKE_chegst_work( int matrix_order, lapack_int itype, char uplo,
                                lapack_int n, lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* b,
                                lapack_int ldb );
lapack_int LAPACKE_zhegst_work( int matrix_order, lapack_int itype, char uplo,
                                lapack_int n, lapack_complex_double* a,
                                lapack_int lda, const lapack_complex_double* b,
                                lapack_int ldb );

lapack_int LAPACKE_chegv_work( int matrix_order, lapack_int itype, char jobz,
                               char uplo, lapack_int n, lapack_complex_float* a,
                               lapack_int lda, lapack_complex_float* b,
                               lapack_int ldb, float* w,
                               lapack_complex_float* work, lapack_int lwork,
                               float* rwork );
lapack_int LAPACKE_zhegv_work( int matrix_order, lapack_int itype, char jobz,
                               char uplo, lapack_int n,
                               lapack_complex_double* a, lapack_int lda,
                               lapack_complex_double* b, lapack_int ldb,
                               double* w, lapack_complex_double* work,
                               lapack_int lwork, double* rwork );

lapack_int LAPACKE_chegvd_work( int matrix_order, lapack_int itype, char jobz,
                                char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                float* w, lapack_complex_float* work,
                                lapack_int lwork, float* rwork,
                                lapack_int lrwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_zhegvd_work( int matrix_order, lapack_int itype, char jobz,
                                char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                double* w, lapack_complex_double* work,
                                lapack_int lwork, double* rwork,
                                lapack_int lrwork, lapack_int* iwork,
                                lapack_int liwork );

lapack_int LAPACKE_chegvx_work( int matrix_order, lapack_int itype, char jobz,
                                char range, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                float vl, float vu, lapack_int il,
                                lapack_int iu, float abstol, lapack_int* m,
                                float* w, lapack_complex_float* z,
                                lapack_int ldz, lapack_complex_float* work,
                                lapack_int lwork, float* rwork,
                                lapack_int* iwork, lapack_int* ifail );
lapack_int LAPACKE_zhegvx_work( int matrix_order, lapack_int itype, char jobz,
                                char range, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                double vl, double vu, lapack_int il,
                                lapack_int iu, double abstol, lapack_int* m,
                                double* w, lapack_complex_double* z,
                                lapack_int ldz, lapack_complex_double* work,
                                lapack_int lwork, double* rwork,
                                lapack_int* iwork, lapack_int* ifail );

lapack_int LAPACKE_cherfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* af,
                                lapack_int ldaf, const lapack_int* ipiv,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zherfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_double* a,
                                lapack_int lda, const lapack_complex_double* af,
                                lapack_int ldaf, const lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_cherfsx_work( int matrix_order, char uplo, char equed,
                                 lapack_int n, lapack_int nrhs,
                                 const lapack_complex_float* a, lapack_int lda,
                                 const lapack_complex_float* af,
                                 lapack_int ldaf, const lapack_int* ipiv,
                                 const float* s, const lapack_complex_float* b,
                                 lapack_int ldb, lapack_complex_float* x,
                                 lapack_int ldx, float* rcond, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, lapack_complex_float* work,
                                 float* rwork );
lapack_int LAPACKE_zherfsx_work( int matrix_order, char uplo, char equed,
                                 lapack_int n, lapack_int nrhs,
                                 const lapack_complex_double* a, lapack_int lda,
                                 const lapack_complex_double* af,
                                 lapack_int ldaf, const lapack_int* ipiv,
                                 const double* s,
                                 const lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* x, lapack_int ldx,
                                 double* rcond, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, lapack_complex_double* work,
                                 double* rwork );

lapack_int LAPACKE_chesv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_float* a,
                               lapack_int lda, lapack_int* ipiv,
                               lapack_complex_float* b, lapack_int ldb,
                               lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zhesv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_double* a,
                               lapack_int lda, lapack_int* ipiv,
                               lapack_complex_double* b, lapack_int ldb,
                               lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_chesvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* af, lapack_int ldaf,
                                lapack_int* ipiv, const lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* x,
                                lapack_int ldx, float* rcond, float* ferr,
                                float* berr, lapack_complex_float* work,
                                lapack_int lwork, float* rwork );
lapack_int LAPACKE_zhesvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* af, lapack_int ldaf,
                                lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork );

lapack_int LAPACKE_chesvxx_work( int matrix_order, char fact, char uplo,
                                 lapack_int n, lapack_int nrhs,
                                 lapack_complex_float* a, lapack_int lda,
                                 lapack_complex_float* af, lapack_int ldaf,
                                 lapack_int* ipiv, char* equed, float* s,
                                 lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* x, lapack_int ldx,
                                 float* rcond, float* rpvgrw, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, lapack_complex_float* work,
                                 float* rwork );
lapack_int LAPACKE_zhesvxx_work( int matrix_order, char fact, char uplo,
                                 lapack_int n, lapack_int nrhs,
                                 lapack_complex_double* a, lapack_int lda,
                                 lapack_complex_double* af, lapack_int ldaf,
                                 lapack_int* ipiv, char* equed, double* s,
                                 lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* x, lapack_int ldx,
                                 double* rcond, double* rpvgrw, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, lapack_complex_double* work,
                                 double* rwork );

lapack_int LAPACKE_chetrd_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                float* d, float* e, lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zhetrd_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                double* d, double* e,
                                lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_chetrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int* ipiv, lapack_complex_float* work,
                                lapack_int lwork );
lapack_int LAPACKE_zhetrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* ipiv, lapack_complex_double* work,
                                lapack_int lwork );

lapack_int LAPACKE_chetri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                const lapack_int* ipiv,
                                lapack_complex_float* work );
lapack_int LAPACKE_zhetri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                const lapack_int* ipiv,
                                lapack_complex_double* work );

lapack_int LAPACKE_chetrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* a,
                                lapack_int lda, const lapack_int* ipiv,
                                lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zhetrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_double* a,
                                lapack_int lda, const lapack_int* ipiv,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_chfrk_work( int matrix_order, char transr, char uplo,
                               char trans, lapack_int n, lapack_int k,
                               float alpha, const lapack_complex_float* a,
                               lapack_int lda, float beta,
                               lapack_complex_float* c );
lapack_int LAPACKE_zhfrk_work( int matrix_order, char transr, char uplo,
                               char trans, lapack_int n, lapack_int k,
                               double alpha, const lapack_complex_double* a,
                               lapack_int lda, double beta,
                               lapack_complex_double* c );

lapack_int LAPACKE_shgeqz_work( int matrix_order, char job, char compq,
                                char compz, lapack_int n, lapack_int ilo,
                                lapack_int ihi, float* h, lapack_int ldh,
                                float* t, lapack_int ldt, float* alphar,
                                float* alphai, float* beta, float* q,
                                lapack_int ldq, float* z, lapack_int ldz,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dhgeqz_work( int matrix_order, char job, char compq,
                                char compz, lapack_int n, lapack_int ilo,
                                lapack_int ihi, double* h, lapack_int ldh,
                                double* t, lapack_int ldt, double* alphar,
                                double* alphai, double* beta, double* q,
                                lapack_int ldq, double* z, lapack_int ldz,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_chgeqz_work( int matrix_order, char job, char compq,
                                char compz, lapack_int n, lapack_int ilo,
                                lapack_int ihi, lapack_complex_float* h,
                                lapack_int ldh, lapack_complex_float* t,
                                lapack_int ldt, lapack_complex_float* alpha,
                                lapack_complex_float* beta,
                                lapack_complex_float* q, lapack_int ldq,
                                lapack_complex_float* z, lapack_int ldz,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork );
lapack_int LAPACKE_zhgeqz_work( int matrix_order, char job, char compq,
                                char compz, lapack_int n, lapack_int ilo,
                                lapack_int ihi, lapack_complex_double* h,
                                lapack_int ldh, lapack_complex_double* t,
                                lapack_int ldt, lapack_complex_double* alpha,
                                lapack_complex_double* beta,
                                lapack_complex_double* q, lapack_int ldq,
                                lapack_complex_double* z, lapack_int ldz,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork );

lapack_int LAPACKE_chpcon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_float* ap,
                                const lapack_int* ipiv, float anorm,
                                float* rcond, lapack_complex_float* work );
lapack_int LAPACKE_zhpcon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_double* ap,
                                const lapack_int* ipiv, double anorm,
                                double* rcond, lapack_complex_double* work );

lapack_int LAPACKE_chpev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_complex_float* ap, float* w,
                               lapack_complex_float* z, lapack_int ldz,
                               lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zhpev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_complex_double* ap,
                               double* w, lapack_complex_double* z,
                               lapack_int ldz, lapack_complex_double* work,
                               double* rwork );

lapack_int LAPACKE_chpevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_complex_float* ap,
                                float* w, lapack_complex_float* z,
                                lapack_int ldz, lapack_complex_float* work,
                                lapack_int lwork, float* rwork,
                                lapack_int lrwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_zhpevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_complex_double* ap,
                                double* w, lapack_complex_double* z,
                                lapack_int ldz, lapack_complex_double* work,
                                lapack_int lwork, double* rwork,
                                lapack_int lrwork, lapack_int* iwork,
                                lapack_int liwork );

lapack_int LAPACKE_chpevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n,
                                lapack_complex_float* ap, float vl, float vu,
                                lapack_int il, lapack_int iu, float abstol,
                                lapack_int* m, float* w,
                                lapack_complex_float* z, lapack_int ldz,
                                lapack_complex_float* work, float* rwork,
                                lapack_int* iwork, lapack_int* ifail );
lapack_int LAPACKE_zhpevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n,
                                lapack_complex_double* ap, double vl, double vu,
                                lapack_int il, lapack_int iu, double abstol,
                                lapack_int* m, double* w,
                                lapack_complex_double* z, lapack_int ldz,
                                lapack_complex_double* work, double* rwork,
                                lapack_int* iwork, lapack_int* ifail );

lapack_int LAPACKE_chpgst_work( int matrix_order, lapack_int itype, char uplo,
                                lapack_int n, lapack_complex_float* ap,
                                const lapack_complex_float* bp );
lapack_int LAPACKE_zhpgst_work( int matrix_order, lapack_int itype, char uplo,
                                lapack_int n, lapack_complex_double* ap,
                                const lapack_complex_double* bp );

lapack_int LAPACKE_chpgv_work( int matrix_order, lapack_int itype, char jobz,
                               char uplo, lapack_int n,
                               lapack_complex_float* ap,
                               lapack_complex_float* bp, float* w,
                               lapack_complex_float* z, lapack_int ldz,
                               lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zhpgv_work( int matrix_order, lapack_int itype, char jobz,
                               char uplo, lapack_int n,
                               lapack_complex_double* ap,
                               lapack_complex_double* bp, double* w,
                               lapack_complex_double* z, lapack_int ldz,
                               lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_chpgvd_work( int matrix_order, lapack_int itype, char jobz,
                                char uplo, lapack_int n,
                                lapack_complex_float* ap,
                                lapack_complex_float* bp, float* w,
                                lapack_complex_float* z, lapack_int ldz,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork, lapack_int lrwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_zhpgvd_work( int matrix_order, lapack_int itype, char jobz,
                                char uplo, lapack_int n,
                                lapack_complex_double* ap,
                                lapack_complex_double* bp, double* w,
                                lapack_complex_double* z, lapack_int ldz,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork, lapack_int lrwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_chpgvx_work( int matrix_order, lapack_int itype, char jobz,
                                char range, char uplo, lapack_int n,
                                lapack_complex_float* ap,
                                lapack_complex_float* bp, float vl, float vu,
                                lapack_int il, lapack_int iu, float abstol,
                                lapack_int* m, float* w,
                                lapack_complex_float* z, lapack_int ldz,
                                lapack_complex_float* work, float* rwork,
                                lapack_int* iwork, lapack_int* ifail );
lapack_int LAPACKE_zhpgvx_work( int matrix_order, lapack_int itype, char jobz,
                                char range, char uplo, lapack_int n,
                                lapack_complex_double* ap,
                                lapack_complex_double* bp, double vl, double vu,
                                lapack_int il, lapack_int iu, double abstol,
                                lapack_int* m, double* w,
                                lapack_complex_double* z, lapack_int ldz,
                                lapack_complex_double* work, double* rwork,
                                lapack_int* iwork, lapack_int* ifail );

lapack_int LAPACKE_chprfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* ap,
                                const lapack_complex_float* afp,
                                const lapack_int* ipiv,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zhprfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs,
                                const lapack_complex_double* ap,
                                const lapack_complex_double* afp,
                                const lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_chpsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_float* ap,
                               lapack_int* ipiv, lapack_complex_float* b,
                               lapack_int ldb );
lapack_int LAPACKE_zhpsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_double* ap,
                               lapack_int* ipiv, lapack_complex_double* b,
                               lapack_int ldb );

lapack_int LAPACKE_chpsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_float* ap,
                                lapack_complex_float* afp, lapack_int* ipiv,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* rcond, float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zhpsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_double* ap,
                                lapack_complex_double* afp, lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_chptrd_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* ap, float* d, float* e,
                                lapack_complex_float* tau );
lapack_int LAPACKE_zhptrd_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* ap, double* d, double* e,
                                lapack_complex_double* tau );

lapack_int LAPACKE_chptrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* ap, lapack_int* ipiv );
lapack_int LAPACKE_zhptrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* ap, lapack_int* ipiv );

lapack_int LAPACKE_chptri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* ap,
                                const lapack_int* ipiv,
                                lapack_complex_float* work );
lapack_int LAPACKE_zhptri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* ap,
                                const lapack_int* ipiv,
                                lapack_complex_double* work );

lapack_int LAPACKE_chptrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* ap,
                                const lapack_int* ipiv, lapack_complex_float* b,
                                lapack_int ldb );
lapack_int LAPACKE_zhptrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs,
                                const lapack_complex_double* ap,
                                const lapack_int* ipiv,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_shsein_work( int matrix_order, char job, char eigsrc,
                                char initv, lapack_logical* select,
                                lapack_int n, const float* h, lapack_int ldh,
                                float* wr, const float* wi, float* vl,
                                lapack_int ldvl, float* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m, float* work,
                                lapack_int* ifaill, lapack_int* ifailr );
lapack_int LAPACKE_dhsein_work( int matrix_order, char job, char eigsrc,
                                char initv, lapack_logical* select,
                                lapack_int n, const double* h, lapack_int ldh,
                                double* wr, const double* wi, double* vl,
                                lapack_int ldvl, double* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m, double* work,
                                lapack_int* ifaill, lapack_int* ifailr );
lapack_int LAPACKE_chsein_work( int matrix_order, char job, char eigsrc,
                                char initv, const lapack_logical* select,
                                lapack_int n, const lapack_complex_float* h,
                                lapack_int ldh, lapack_complex_float* w,
                                lapack_complex_float* vl, lapack_int ldvl,
                                lapack_complex_float* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m,
                                lapack_complex_float* work, float* rwork,
                                lapack_int* ifaill, lapack_int* ifailr );
lapack_int LAPACKE_zhsein_work( int matrix_order, char job, char eigsrc,
                                char initv, const lapack_logical* select,
                                lapack_int n, const lapack_complex_double* h,
                                lapack_int ldh, lapack_complex_double* w,
                                lapack_complex_double* vl, lapack_int ldvl,
                                lapack_complex_double* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m,
                                lapack_complex_double* work, double* rwork,
                                lapack_int* ifaill, lapack_int* ifailr );

lapack_int LAPACKE_shseqr_work( int matrix_order, char job, char compz,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                float* h, lapack_int ldh, float* wr, float* wi,
                                float* z, lapack_int ldz, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dhseqr_work( int matrix_order, char job, char compz,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                double* h, lapack_int ldh, double* wr,
                                double* wi, double* z, lapack_int ldz,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_chseqr_work( int matrix_order, char job, char compz,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                lapack_complex_float* h, lapack_int ldh,
                                lapack_complex_float* w,
                                lapack_complex_float* z, lapack_int ldz,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zhseqr_work( int matrix_order, char job, char compz,
                                lapack_int n, lapack_int ilo, lapack_int ihi,
                                lapack_complex_double* h, lapack_int ldh,
                                lapack_complex_double* w,
                                lapack_complex_double* z, lapack_int ldz,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_clacgv_work( lapack_int n, lapack_complex_float* x,
                                lapack_int incx );
lapack_int LAPACKE_zlacgv_work( lapack_int n, lapack_complex_double* x,
                                lapack_int incx );

lapack_int LAPACKE_slacn2_work( lapack_int n, float* v, float* x,
                                lapack_int* isgn, float* est, lapack_int* kase,
                                lapack_int* isave );
lapack_int LAPACKE_dlacn2_work( lapack_int n, double* v, double* x,
                                lapack_int* isgn, double* est, lapack_int* kase,
                                lapack_int* isave );
lapack_int LAPACKE_clacn2_work( lapack_int n, lapack_complex_float* v,
                                lapack_complex_float* x,
                                float* est, lapack_int* kase,
                                lapack_int* isave );
lapack_int LAPACKE_zlacn2_work( lapack_int n, lapack_complex_double* v,
                                lapack_complex_double* x,
                                double* est, lapack_int* kase,
                                lapack_int* isave );

lapack_int LAPACKE_slacpy_work( int matrix_order, char uplo, lapack_int m,
                                lapack_int n, const float* a, lapack_int lda,
                                float* b, lapack_int ldb );
lapack_int LAPACKE_dlacpy_work( int matrix_order, char uplo, lapack_int m,
                                lapack_int n, const double* a, lapack_int lda,
                                double* b, lapack_int ldb );
lapack_int LAPACKE_clacpy_work( int matrix_order, char uplo, lapack_int m,
                                lapack_int n, const lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* b,
                                lapack_int ldb );
lapack_int LAPACKE_zlacpy_work( int matrix_order, char uplo, lapack_int m,
                                lapack_int n, const lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb );

lapack_int LAPACKE_clacp2_work( int matrix_order, char uplo, lapack_int m,
                                lapack_int n, const float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zlacp2_work( int matrix_order, char uplo, lapack_int m,
                                lapack_int n, const double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_zlag2c_work( int matrix_order, lapack_int m, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                lapack_complex_float* sa, lapack_int ldsa );

lapack_int LAPACKE_slag2d_work( int matrix_order, lapack_int m, lapack_int n,
                                const float* sa, lapack_int ldsa, double* a,
                                lapack_int lda );

lapack_int LAPACKE_dlag2s_work( int matrix_order, lapack_int m, lapack_int n,
                                const double* a, lapack_int lda, float* sa,
                                lapack_int ldsa );

lapack_int LAPACKE_clag2z_work( int matrix_order, lapack_int m, lapack_int n,
                                const lapack_complex_float* sa, lapack_int ldsa,
                                lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_slagge_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku, const float* d,
                                float* a, lapack_int lda, lapack_int* iseed,
                                float* work );
lapack_int LAPACKE_dlagge_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku, const double* d,
                                double* a, lapack_int lda, lapack_int* iseed,
                                double* work );
lapack_int LAPACKE_clagge_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku, const float* d,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int* iseed, lapack_complex_float* work );
lapack_int LAPACKE_zlagge_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int kl, lapack_int ku, const double* d,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* iseed,
                                lapack_complex_double* work );
                                
lapack_int LAPACKE_claghe_work( int matrix_order, lapack_int n, lapack_int k,
                                const float* d, lapack_complex_float* a,
                                lapack_int lda, lapack_int* iseed,
                                lapack_complex_float* work );
lapack_int LAPACKE_zlaghe_work( int matrix_order, lapack_int n, lapack_int k,
                                const double* d, lapack_complex_double* a,
                                lapack_int lda, lapack_int* iseed,
                                lapack_complex_double* work );

lapack_int LAPACKE_slagsy_work( int matrix_order, lapack_int n, lapack_int k,
                                const float* d, float* a, lapack_int lda,
                                lapack_int* iseed, float* work );
lapack_int LAPACKE_dlagsy_work( int matrix_order, lapack_int n, lapack_int k,
                                const double* d, double* a, lapack_int lda,
                                lapack_int* iseed, double* work );
lapack_int LAPACKE_clagsy_work( int matrix_order, lapack_int n, lapack_int k,
                                const float* d, lapack_complex_float* a,
                                lapack_int lda, lapack_int* iseed,
                                lapack_complex_float* work );
lapack_int LAPACKE_zlagsy_work( int matrix_order, lapack_int n, lapack_int k,
                                const double* d, lapack_complex_double* a,
                                lapack_int lda, lapack_int* iseed,
                                lapack_complex_double* work );

lapack_int LAPACKE_slapmr_work( int matrix_order, lapack_logical forwrd,
                                lapack_int m, lapack_int n, float* x,
                                lapack_int ldx, lapack_int* k );
lapack_int LAPACKE_dlapmr_work( int matrix_order, lapack_logical forwrd,
                                lapack_int m, lapack_int n, double* x,
                                lapack_int ldx, lapack_int* k );
lapack_int LAPACKE_clapmr_work( int matrix_order, lapack_logical forwrd,
                                lapack_int m, lapack_int n,
                                lapack_complex_float* x, lapack_int ldx,
                                lapack_int* k );
lapack_int LAPACKE_zlapmr_work( int matrix_order, lapack_logical forwrd,
                                lapack_int m, lapack_int n,
                                lapack_complex_double* x, lapack_int ldx,
                                lapack_int* k );

lapack_int LAPACKE_slartgp_work( float f, float g, float* cs, float* sn,
                                 float* r );
lapack_int LAPACKE_dlartgp_work( double f, double g, double* cs, double* sn,
                                 double* r );

lapack_int LAPACKE_slartgs_work( float x, float y, float sigma, float* cs,
                                 float* sn );
lapack_int LAPACKE_dlartgs_work( double x, double y, double sigma, double* cs,
                                 double* sn );
                                
float LAPACKE_slapy2_work( float x, float y );
double LAPACKE_dlapy2_work( double x, double y );

float LAPACKE_slapy3_work( float x, float y, float z );
double LAPACKE_dlapy3_work( double x, double y, double z );

float LAPACKE_slamch_work( char cmach );
double LAPACKE_dlamch_work( char cmach );

float LAPACKE_slange_work( int matrix_order, char norm, lapack_int m,
                                lapack_int n, const float* a, lapack_int lda,
                                float* work );
double LAPACKE_dlange_work( int matrix_order, char norm, lapack_int m,
                                lapack_int n, const double* a, lapack_int lda,
                                double* work );
float LAPACKE_clange_work( int matrix_order, char norm, lapack_int m,
                                lapack_int n, const lapack_complex_float* a,
                                lapack_int lda, float* work );
double LAPACKE_zlange_work( int matrix_order, char norm, lapack_int m,
                                lapack_int n, const lapack_complex_double* a,
                                lapack_int lda, double* work );

float LAPACKE_clanhe_work( int matrix_order, char norm, char uplo,
                                lapack_int n, const lapack_complex_float* a,
                                lapack_int lda, float* work );
double LAPACKE_zlanhe_work( int matrix_order, char norm, char uplo,
                                lapack_int n, const lapack_complex_double* a,
                                lapack_int lda, double* work );

float LAPACKE_slansy_work( int matrix_order, char norm, char uplo,
                                lapack_int n, const float* a, lapack_int lda,
                                float* work );
double LAPACKE_dlansy_work( int matrix_order, char norm, char uplo,
                                lapack_int n, const double* a, lapack_int lda,
                                double* work );
float LAPACKE_clansy_work( int matrix_order, char norm, char uplo,
                                lapack_int n, const lapack_complex_float* a,
                                lapack_int lda, float* work );
double LAPACKE_zlansy_work( int matrix_order, char norm, char uplo,
                                lapack_int n, const lapack_complex_double* a,
                                lapack_int lda, double* work );

float LAPACKE_slantr_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int m, lapack_int n, const float* a,
                                lapack_int lda, float* work );
double LAPACKE_dlantr_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int m, lapack_int n,
                                const double* a, lapack_int lda, double* work );
float LAPACKE_clantr_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int m, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                float* work );
double LAPACKE_zlantr_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int m, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                double* work );

lapack_int LAPACKE_slarfb_work( int matrix_order, char side, char trans,
                                char direct, char storev, lapack_int m,
                                lapack_int n, lapack_int k, const float* v,
                                lapack_int ldv, const float* t, lapack_int ldt,
                                float* c, lapack_int ldc, float* work,
                                lapack_int ldwork );
lapack_int LAPACKE_dlarfb_work( int matrix_order, char side, char trans,
                                char direct, char storev, lapack_int m,
                                lapack_int n, lapack_int k, const double* v,
                                lapack_int ldv, const double* t, lapack_int ldt,
                                double* c, lapack_int ldc, double* work,
                                lapack_int ldwork );
lapack_int LAPACKE_clarfb_work( int matrix_order, char side, char trans,
                                char direct, char storev, lapack_int m,
                                lapack_int n, lapack_int k,
                                const lapack_complex_float* v, lapack_int ldv,
                                const lapack_complex_float* t, lapack_int ldt,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, lapack_int ldwork );
lapack_int LAPACKE_zlarfb_work( int matrix_order, char side, char trans,
                                char direct, char storev, lapack_int m,
                                lapack_int n, lapack_int k,
                                const lapack_complex_double* v, lapack_int ldv,
                                const lapack_complex_double* t, lapack_int ldt,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work,
                                lapack_int ldwork );

lapack_int LAPACKE_slarfg_work( lapack_int n, float* alpha, float* x,
                                lapack_int incx, float* tau );
lapack_int LAPACKE_dlarfg_work( lapack_int n, double* alpha, double* x,
                                lapack_int incx, double* tau );
lapack_int LAPACKE_clarfg_work( lapack_int n, lapack_complex_float* alpha,
                                lapack_complex_float* x, lapack_int incx,
                                lapack_complex_float* tau );
lapack_int LAPACKE_zlarfg_work( lapack_int n, lapack_complex_double* alpha,
                                lapack_complex_double* x, lapack_int incx,
                                lapack_complex_double* tau );

lapack_int LAPACKE_slarft_work( int matrix_order, char direct, char storev,
                                lapack_int n, lapack_int k, const float* v,
                                lapack_int ldv, const float* tau, float* t,
                                lapack_int ldt );
lapack_int LAPACKE_dlarft_work( int matrix_order, char direct, char storev,
                                lapack_int n, lapack_int k, const double* v,
                                lapack_int ldv, const double* tau, double* t,
                                lapack_int ldt );
lapack_int LAPACKE_clarft_work( int matrix_order, char direct, char storev,
                                lapack_int n, lapack_int k,
                                const lapack_complex_float* v, lapack_int ldv,
                                const lapack_complex_float* tau,
                                lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_zlarft_work( int matrix_order, char direct, char storev,
                                lapack_int n, lapack_int k,
                                const lapack_complex_double* v, lapack_int ldv,
                                const lapack_complex_double* tau,
                                lapack_complex_double* t, lapack_int ldt );

lapack_int LAPACKE_slarfx_work( int matrix_order, char side, lapack_int m,
                                lapack_int n, const float* v, float tau,
                                float* c, lapack_int ldc, float* work );
lapack_int LAPACKE_dlarfx_work( int matrix_order, char side, lapack_int m,
                                lapack_int n, const double* v, double tau,
                                double* c, lapack_int ldc, double* work );
lapack_int LAPACKE_clarfx_work( int matrix_order, char side, lapack_int m,
                                lapack_int n, const lapack_complex_float* v,
                                lapack_complex_float tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work );
lapack_int LAPACKE_zlarfx_work( int matrix_order, char side, lapack_int m,
                                lapack_int n, const lapack_complex_double* v,
                                lapack_complex_double tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work );

lapack_int LAPACKE_slarnv_work( lapack_int idist, lapack_int* iseed,
                                lapack_int n, float* x );
lapack_int LAPACKE_dlarnv_work( lapack_int idist, lapack_int* iseed,
                                lapack_int n, double* x );
lapack_int LAPACKE_clarnv_work( lapack_int idist, lapack_int* iseed,
                                lapack_int n, lapack_complex_float* x );
lapack_int LAPACKE_zlarnv_work( lapack_int idist, lapack_int* iseed,
                                lapack_int n, lapack_complex_double* x );

lapack_int LAPACKE_slaset_work( int matrix_order, char uplo, lapack_int m,
                                lapack_int n, float alpha, float beta, float* a,
                                lapack_int lda );
lapack_int LAPACKE_dlaset_work( int matrix_order, char uplo, lapack_int m,
                                lapack_int n, double alpha, double beta,
                                double* a, lapack_int lda );
lapack_int LAPACKE_claset_work( int matrix_order, char uplo, lapack_int m,
                                lapack_int n, lapack_complex_float alpha,
                                lapack_complex_float beta,
                                lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_zlaset_work( int matrix_order, char uplo, lapack_int m,
                                lapack_int n, lapack_complex_double alpha,
                                lapack_complex_double beta,
                                lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_slasrt_work( char id, lapack_int n, float* d );
lapack_int LAPACKE_dlasrt_work( char id, lapack_int n, double* d );

lapack_int LAPACKE_slaswp_work( int matrix_order, lapack_int n, float* a,
                                lapack_int lda, lapack_int k1, lapack_int k2,
                                const lapack_int* ipiv, lapack_int incx );
lapack_int LAPACKE_dlaswp_work( int matrix_order, lapack_int n, double* a,
                                lapack_int lda, lapack_int k1, lapack_int k2,
                                const lapack_int* ipiv, lapack_int incx );
lapack_int LAPACKE_claswp_work( int matrix_order, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int k1, lapack_int k2,
                                const lapack_int* ipiv, lapack_int incx );
lapack_int LAPACKE_zlaswp_work( int matrix_order, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int k1, lapack_int k2,
                                const lapack_int* ipiv, lapack_int incx );

lapack_int LAPACKE_slatms_work( int matrix_order, lapack_int m, lapack_int n,
                                char dist, lapack_int* iseed, char sym,
                                float* d, lapack_int mode, float cond,
                                float dmax, lapack_int kl, lapack_int ku,
                                char pack, float* a, lapack_int lda,
                                float* work );
lapack_int LAPACKE_dlatms_work( int matrix_order, lapack_int m, lapack_int n,
                                char dist, lapack_int* iseed, char sym,
                                double* d, lapack_int mode, double cond,
                                double dmax, lapack_int kl, lapack_int ku,
                                char pack, double* a, lapack_int lda,
                                double* work );
lapack_int LAPACKE_clatms_work( int matrix_order, lapack_int m, lapack_int n,
                                char dist, lapack_int* iseed, char sym,
                                float* d, lapack_int mode, float cond,
                                float dmax, lapack_int kl, lapack_int ku,
                                char pack, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* work );
lapack_int LAPACKE_zlatms_work( int matrix_order, lapack_int m, lapack_int n,
                                char dist, lapack_int* iseed, char sym,
                                double* d, lapack_int mode, double cond,
                                double dmax, lapack_int kl, lapack_int ku,
                                char pack, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* work );

lapack_int LAPACKE_slauum_work( int matrix_order, char uplo, lapack_int n,
                                float* a, lapack_int lda );
lapack_int LAPACKE_dlauum_work( int matrix_order, char uplo, lapack_int n,
                                double* a, lapack_int lda );
lapack_int LAPACKE_clauum_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_zlauum_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_sopgtr_work( int matrix_order, char uplo, lapack_int n,
                                const float* ap, const float* tau, float* q,
                                lapack_int ldq, float* work );
lapack_int LAPACKE_dopgtr_work( int matrix_order, char uplo, lapack_int n,
                                const double* ap, const double* tau, double* q,
                                lapack_int ldq, double* work );

lapack_int LAPACKE_sopmtr_work( int matrix_order, char side, char uplo,
                                char trans, lapack_int m, lapack_int n,
                                const float* ap, const float* tau, float* c,
                                lapack_int ldc, float* work );
lapack_int LAPACKE_dopmtr_work( int matrix_order, char side, char uplo,
                                char trans, lapack_int m, lapack_int n,
                                const double* ap, const double* tau, double* c,
                                lapack_int ldc, double* work );

lapack_int LAPACKE_sorgbr_work( int matrix_order, char vect, lapack_int m,
                                lapack_int n, lapack_int k, float* a,
                                lapack_int lda, const float* tau, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dorgbr_work( int matrix_order, char vect, lapack_int m,
                                lapack_int n, lapack_int k, double* a,
                                lapack_int lda, const double* tau, double* work,
                                lapack_int lwork );

lapack_int LAPACKE_sorghr_work( int matrix_order, lapack_int n, lapack_int ilo,
                                lapack_int ihi, float* a, lapack_int lda,
                                const float* tau, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dorghr_work( int matrix_order, lapack_int n, lapack_int ilo,
                                lapack_int ihi, double* a, lapack_int lda,
                                const double* tau, double* work,
                                lapack_int lwork );

lapack_int LAPACKE_sorglq_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, float* a, lapack_int lda,
                                const float* tau, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dorglq_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, double* a, lapack_int lda,
                                const double* tau, double* work,
                                lapack_int lwork );

lapack_int LAPACKE_sorgql_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, float* a, lapack_int lda,
                                const float* tau, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dorgql_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, double* a, lapack_int lda,
                                const double* tau, double* work,
                                lapack_int lwork );

lapack_int LAPACKE_sorgqr_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, float* a, lapack_int lda,
                                const float* tau, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dorgqr_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, double* a, lapack_int lda,
                                const double* tau, double* work,
                                lapack_int lwork );

lapack_int LAPACKE_sorgrq_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, float* a, lapack_int lda,
                                const float* tau, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dorgrq_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, double* a, lapack_int lda,
                                const double* tau, double* work,
                                lapack_int lwork );

lapack_int LAPACKE_sorgtr_work( int matrix_order, char uplo, lapack_int n,
                                float* a, lapack_int lda, const float* tau,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dorgtr_work( int matrix_order, char uplo, lapack_int n,
                                double* a, lapack_int lda, const double* tau,
                                double* work, lapack_int lwork );

lapack_int LAPACKE_sormbr_work( int matrix_order, char vect, char side,
                                char trans, lapack_int m, lapack_int n,
                                lapack_int k, const float* a, lapack_int lda,
                                const float* tau, float* c, lapack_int ldc,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dormbr_work( int matrix_order, char vect, char side,
                                char trans, lapack_int m, lapack_int n,
                                lapack_int k, const double* a, lapack_int lda,
                                const double* tau, double* c, lapack_int ldc,
                                double* work, lapack_int lwork );

lapack_int LAPACKE_sormhr_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int ilo,
                                lapack_int ihi, const float* a, lapack_int lda,
                                const float* tau, float* c, lapack_int ldc,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dormhr_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int ilo,
                                lapack_int ihi, const double* a, lapack_int lda,
                                const double* tau, double* c, lapack_int ldc,
                                double* work, lapack_int lwork );

lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const float* a, lapack_int lda,
                                const float* tau, float* c, lapack_int ldc,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const double* a, lapack_int lda,
                                const double* tau, double* c, lapack_int ldc,
                                double* work, lapack_int lwork );

lapack_int LAPACKE_sormql_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const float* a, lapack_int lda,
                                const float* tau, float* c, lapack_int ldc,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dormql_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const double* a, lapack_int lda,
                                const double* tau, double* c, lapack_int ldc,
                                double* work, lapack_int lwork );

lapack_int LAPACKE_sormqr_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const float* a, lapack_int lda,
                                const float* tau, float* c, lapack_int ldc,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dormqr_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const double* a, lapack_int lda,
                                const double* tau, double* c, lapack_int ldc,
                                double* work, lapack_int lwork );

lapack_int LAPACKE_sormrq_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const float* a, lapack_int lda,
                                const float* tau, float* c, lapack_int ldc,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dormrq_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const double* a, lapack_int lda,
                                const double* tau, double* c, lapack_int ldc,
                                double* work, lapack_int lwork );

lapack_int LAPACKE_sormrz_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                lapack_int l, const float* a, lapack_int lda,
                                const float* tau, float* c, lapack_int ldc,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dormrz_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                lapack_int l, const double* a, lapack_int lda,
                                const double* tau, double* c, lapack_int ldc,
                                double* work, lapack_int lwork );

lapack_int LAPACKE_sormtr_work( int matrix_order, char side, char uplo,
                                char trans, lapack_int m, lapack_int n,
                                const float* a, lapack_int lda,
                                const float* tau, float* c, lapack_int ldc,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dormtr_work( int matrix_order, char side, char uplo,
                                char trans, lapack_int m, lapack_int n,
                                const double* a, lapack_int lda,
                                const double* tau, double* c, lapack_int ldc,
                                double* work, lapack_int lwork );

lapack_int LAPACKE_spbcon_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, const float* ab, lapack_int ldab,
                                float anorm, float* rcond, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dpbcon_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, const double* ab,
                                lapack_int ldab, double anorm, double* rcond,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_cpbcon_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, const lapack_complex_float* ab,
                                lapack_int ldab, float anorm, float* rcond,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zpbcon_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, const lapack_complex_double* ab,
                                lapack_int ldab, double anorm, double* rcond,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_spbequ_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, const float* ab, lapack_int ldab,
                                float* s, float* scond, float* amax );
lapack_int LAPACKE_dpbequ_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, const double* ab,
                                lapack_int ldab, double* s, double* scond,
                                double* amax );
lapack_int LAPACKE_cpbequ_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, const lapack_complex_float* ab,
                                lapack_int ldab, float* s, float* scond,
                                float* amax );
lapack_int LAPACKE_zpbequ_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, const lapack_complex_double* ab,
                                lapack_int ldab, double* s, double* scond,
                                double* amax );

lapack_int LAPACKE_spbrfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, lapack_int nrhs, const float* ab,
                                lapack_int ldab, const float* afb,
                                lapack_int ldafb, const float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* ferr, float* berr, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dpbrfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, lapack_int nrhs,
                                const double* ab, lapack_int ldab,
                                const double* afb, lapack_int ldafb,
                                const double* b, lapack_int ldb, double* x,
                                lapack_int ldx, double* ferr, double* berr,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_cpbrfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, lapack_int nrhs,
                                const lapack_complex_float* ab, lapack_int ldab,
                                const lapack_complex_float* afb,
                                lapack_int ldafb, const lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* x,
                                lapack_int ldx, float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zpbrfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, lapack_int nrhs,
                                const lapack_complex_double* ab,
                                lapack_int ldab,
                                const lapack_complex_double* afb,
                                lapack_int ldafb,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_spbstf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kb, float* bb, lapack_int ldbb );
lapack_int LAPACKE_dpbstf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kb, double* bb, lapack_int ldbb );
lapack_int LAPACKE_cpbstf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kb, lapack_complex_float* bb,
                                lapack_int ldbb );
lapack_int LAPACKE_zpbstf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kb, lapack_complex_double* bb,
                                lapack_int ldbb );

lapack_int LAPACKE_spbsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int kd, lapack_int nrhs, float* ab,
                               lapack_int ldab, float* b, lapack_int ldb );
lapack_int LAPACKE_dpbsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int kd, lapack_int nrhs, double* ab,
                               lapack_int ldab, double* b, lapack_int ldb );
lapack_int LAPACKE_cpbsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int kd, lapack_int nrhs,
                               lapack_complex_float* ab, lapack_int ldab,
                               lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zpbsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int kd, lapack_int nrhs,
                               lapack_complex_double* ab, lapack_int ldab,
                               lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_spbsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int kd, lapack_int nrhs,
                                float* ab, lapack_int ldab, float* afb,
                                lapack_int ldafb, char* equed, float* s,
                                float* b, lapack_int ldb, float* x,
                                lapack_int ldx, float* rcond, float* ferr,
                                float* berr, float* work, lapack_int* iwork );
lapack_int LAPACKE_dpbsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int kd, lapack_int nrhs,
                                double* ab, lapack_int ldab, double* afb,
                                lapack_int ldafb, char* equed, double* s,
                                double* b, lapack_int ldb, double* x,
                                lapack_int ldx, double* rcond, double* ferr,
                                double* berr, double* work, lapack_int* iwork );
lapack_int LAPACKE_cpbsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int kd, lapack_int nrhs,
                                lapack_complex_float* ab, lapack_int ldab,
                                lapack_complex_float* afb, lapack_int ldafb,
                                char* equed, float* s, lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* x,
                                lapack_int ldx, float* rcond, float* ferr,
                                float* berr, lapack_complex_float* work,
                                float* rwork );
lapack_int LAPACKE_zpbsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int kd, lapack_int nrhs,
                                lapack_complex_double* ab, lapack_int ldab,
                                lapack_complex_double* afb, lapack_int ldafb,
                                char* equed, double* s,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_spbtrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, float* ab, lapack_int ldab );
lapack_int LAPACKE_dpbtrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, double* ab, lapack_int ldab );
lapack_int LAPACKE_cpbtrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, lapack_complex_float* ab,
                                lapack_int ldab );
lapack_int LAPACKE_zpbtrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, lapack_complex_double* ab,
                                lapack_int ldab );

lapack_int LAPACKE_spbtrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, lapack_int nrhs, const float* ab,
                                lapack_int ldab, float* b, lapack_int ldb );
lapack_int LAPACKE_dpbtrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, lapack_int nrhs,
                                const double* ab, lapack_int ldab, double* b,
                                lapack_int ldb );
lapack_int LAPACKE_cpbtrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, lapack_int nrhs,
                                const lapack_complex_float* ab, lapack_int ldab,
                                lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zpbtrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int kd, lapack_int nrhs,
                                const lapack_complex_double* ab,
                                lapack_int ldab, lapack_complex_double* b,
                                lapack_int ldb );

lapack_int LAPACKE_spftrf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, float* a );
lapack_int LAPACKE_dpftrf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, double* a );
lapack_int LAPACKE_cpftrf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, lapack_complex_float* a );
lapack_int LAPACKE_zpftrf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, lapack_complex_double* a );

lapack_int LAPACKE_spftri_work( int matrix_order, char transr, char uplo,
                                lapack_int n, float* a );
lapack_int LAPACKE_dpftri_work( int matrix_order, char transr, char uplo,
                                lapack_int n, double* a );
lapack_int LAPACKE_cpftri_work( int matrix_order, char transr, char uplo,
                                lapack_int n, lapack_complex_float* a );
lapack_int LAPACKE_zpftri_work( int matrix_order, char transr, char uplo,
                                lapack_int n, lapack_complex_double* a );

lapack_int LAPACKE_spftrs_work( int matrix_order, char transr, char uplo,
                                lapack_int n, lapack_int nrhs, const float* a,
                                float* b, lapack_int ldb );
lapack_int LAPACKE_dpftrs_work( int matrix_order, char transr, char uplo,
                                lapack_int n, lapack_int nrhs, const double* a,
                                double* b, lapack_int ldb );
lapack_int LAPACKE_cpftrs_work( int matrix_order, char transr, char uplo,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_float* a,
                                lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zpftrs_work( int matrix_order, char transr, char uplo,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_double* a,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_spocon_work( int matrix_order, char uplo, lapack_int n,
                                const float* a, lapack_int lda, float anorm,
                                float* rcond, float* work, lapack_int* iwork );
lapack_int LAPACKE_dpocon_work( int matrix_order, char uplo, lapack_int n,
                                const double* a, lapack_int lda, double anorm,
                                double* rcond, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_cpocon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                float anorm, float* rcond,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zpocon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                double anorm, double* rcond,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_spoequ_work( int matrix_order, lapack_int n, const float* a,
                                lapack_int lda, float* s, float* scond,
                                float* amax );
lapack_int LAPACKE_dpoequ_work( int matrix_order, lapack_int n, const double* a,
                                lapack_int lda, double* s, double* scond,
                                double* amax );
lapack_int LAPACKE_cpoequ_work( int matrix_order, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                float* s, float* scond, float* amax );
lapack_int LAPACKE_zpoequ_work( int matrix_order, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                double* s, double* scond, double* amax );

lapack_int LAPACKE_spoequb_work( int matrix_order, lapack_int n, const float* a,
                                 lapack_int lda, float* s, float* scond,
                                 float* amax );
lapack_int LAPACKE_dpoequb_work( int matrix_order, lapack_int n,
                                 const double* a, lapack_int lda, double* s,
                                 double* scond, double* amax );
lapack_int LAPACKE_cpoequb_work( int matrix_order, lapack_int n,
                                 const lapack_complex_float* a, lapack_int lda,
                                 float* s, float* scond, float* amax );
lapack_int LAPACKE_zpoequb_work( int matrix_order, lapack_int n,
                                 const lapack_complex_double* a, lapack_int lda,
                                 double* s, double* scond, double* amax );

lapack_int LAPACKE_sporfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* a, lapack_int lda,
                                const float* af, lapack_int ldaf,
                                const float* b, lapack_int ldb, float* x,
                                lapack_int ldx, float* ferr, float* berr,
                                float* work, lapack_int* iwork );
lapack_int LAPACKE_dporfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* a,
                                lapack_int lda, const double* af,
                                lapack_int ldaf, const double* b,
                                lapack_int ldb, double* x, lapack_int ldx,
                                double* ferr, double* berr, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_cporfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* af,
                                lapack_int ldaf, const lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* x,
                                lapack_int ldx, float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zporfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_double* a,
                                lapack_int lda, const lapack_complex_double* af,
                                lapack_int ldaf, const lapack_complex_double* b,
                                lapack_int ldb, lapack_complex_double* x,
                                lapack_int ldx, double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sporfsx_work( int matrix_order, char uplo, char equed,
                                 lapack_int n, lapack_int nrhs, const float* a,
                                 lapack_int lda, const float* af,
                                 lapack_int ldaf, const float* s,
                                 const float* b, lapack_int ldb, float* x,
                                 lapack_int ldx, float* rcond, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, float* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_dporfsx_work( int matrix_order, char uplo, char equed,
                                 lapack_int n, lapack_int nrhs, const double* a,
                                 lapack_int lda, const double* af,
                                 lapack_int ldaf, const double* s,
                                 const double* b, lapack_int ldb, double* x,
                                 lapack_int ldx, double* rcond, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, double* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_cporfsx_work( int matrix_order, char uplo, char equed,
                                 lapack_int n, lapack_int nrhs,
                                 const lapack_complex_float* a, lapack_int lda,
                                 const lapack_complex_float* af,
                                 lapack_int ldaf, const float* s,
                                 const lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* x, lapack_int ldx,
                                 float* rcond, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, lapack_complex_float* work,
                                 float* rwork );
lapack_int LAPACKE_zporfsx_work( int matrix_order, char uplo, char equed,
                                 lapack_int n, lapack_int nrhs,
                                 const lapack_complex_double* a, lapack_int lda,
                                 const lapack_complex_double* af,
                                 lapack_int ldaf, const double* s,
                                 const lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* x, lapack_int ldx,
                                 double* rcond, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, lapack_complex_double* work,
                                 double* rwork );

lapack_int LAPACKE_sposv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, float* a, lapack_int lda,
                               float* b, lapack_int ldb );
lapack_int LAPACKE_dposv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, double* a, lapack_int lda,
                               double* b, lapack_int ldb );
lapack_int LAPACKE_cposv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_float* a,
                               lapack_int lda, lapack_complex_float* b,
                               lapack_int ldb );
lapack_int LAPACKE_zposv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_double* a,
                               lapack_int lda, lapack_complex_double* b,
                               lapack_int ldb );
lapack_int LAPACKE_dsposv_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, double* a, lapack_int lda,
                                double* b, lapack_int ldb, double* x,
                                lapack_int ldx, double* work, float* swork,
                                lapack_int* iter );
lapack_int LAPACKE_zcposv_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb, lapack_complex_double* x,
                                lapack_int ldx, lapack_complex_double* work,
                                lapack_complex_float* swork, double* rwork,
                                lapack_int* iter );

lapack_int LAPACKE_sposvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs, float* a,
                                lapack_int lda, float* af, lapack_int ldaf,
                                char* equed, float* s, float* b, lapack_int ldb,
                                float* x, lapack_int ldx, float* rcond,
                                float* ferr, float* berr, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dposvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs, double* a,
                                lapack_int lda, double* af, lapack_int ldaf,
                                char* equed, double* s, double* b,
                                lapack_int ldb, double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_cposvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* af, lapack_int ldaf,
                                char* equed, float* s, lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* x,
                                lapack_int ldx, float* rcond, float* ferr,
                                float* berr, lapack_complex_float* work,
                                float* rwork );
lapack_int LAPACKE_zposvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* af, lapack_int ldaf,
                                char* equed, double* s,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sposvxx_work( int matrix_order, char fact, char uplo,
                                 lapack_int n, lapack_int nrhs, float* a,
                                 lapack_int lda, float* af, lapack_int ldaf,
                                 char* equed, float* s, float* b,
                                 lapack_int ldb, float* x, lapack_int ldx,
                                 float* rcond, float* rpvgrw, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, float* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_dposvxx_work( int matrix_order, char fact, char uplo,
                                 lapack_int n, lapack_int nrhs, double* a,
                                 lapack_int lda, double* af, lapack_int ldaf,
                                 char* equed, double* s, double* b,
                                 lapack_int ldb, double* x, lapack_int ldx,
                                 double* rcond, double* rpvgrw, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, double* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_cposvxx_work( int matrix_order, char fact, char uplo,
                                 lapack_int n, lapack_int nrhs,
                                 lapack_complex_float* a, lapack_int lda,
                                 lapack_complex_float* af, lapack_int ldaf,
                                 char* equed, float* s, lapack_complex_float* b,
                                 lapack_int ldb, lapack_complex_float* x,
                                 lapack_int ldx, float* rcond, float* rpvgrw,
                                 float* berr, lapack_int n_err_bnds,
                                 float* err_bnds_norm, float* err_bnds_comp,
                                 lapack_int nparams, float* params,
                                 lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zposvxx_work( int matrix_order, char fact, char uplo,
                                 lapack_int n, lapack_int nrhs,
                                 lapack_complex_double* a, lapack_int lda,
                                 lapack_complex_double* af, lapack_int ldaf,
                                 char* equed, double* s,
                                 lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* x, lapack_int ldx,
                                 double* rcond, double* rpvgrw, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, lapack_complex_double* work,
                                 double* rwork );

lapack_int LAPACKE_spotrf_work( int matrix_order, char uplo, lapack_int n,
                                float* a, lapack_int lda );
lapack_int LAPACKE_dpotrf_work( int matrix_order, char uplo, lapack_int n,
                                double* a, lapack_int lda );
lapack_int LAPACKE_cpotrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_zpotrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_spotri_work( int matrix_order, char uplo, lapack_int n,
                                float* a, lapack_int lda );
lapack_int LAPACKE_dpotri_work( int matrix_order, char uplo, lapack_int n,
                                double* a, lapack_int lda );
lapack_int LAPACKE_cpotri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_zpotri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_spotrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* a, lapack_int lda,
                                float* b, lapack_int ldb );
lapack_int LAPACKE_dpotrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* a,
                                lapack_int lda, double* b, lapack_int ldb );
lapack_int LAPACKE_cpotrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* b,
                                lapack_int ldb );
lapack_int LAPACKE_zpotrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb );

lapack_int LAPACKE_sppcon_work( int matrix_order, char uplo, lapack_int n,
                                const float* ap, float anorm, float* rcond,
                                float* work, lapack_int* iwork );
lapack_int LAPACKE_dppcon_work( int matrix_order, char uplo, lapack_int n,
                                const double* ap, double anorm, double* rcond,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_cppcon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_float* ap, float anorm,
                                float* rcond, lapack_complex_float* work,
                                float* rwork );
lapack_int LAPACKE_zppcon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_double* ap, double anorm,
                                double* rcond, lapack_complex_double* work,
                                double* rwork );

lapack_int LAPACKE_sppequ_work( int matrix_order, char uplo, lapack_int n,
                                const float* ap, float* s, float* scond,
                                float* amax );
lapack_int LAPACKE_dppequ_work( int matrix_order, char uplo, lapack_int n,
                                const double* ap, double* s, double* scond,
                                double* amax );
lapack_int LAPACKE_cppequ_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_float* ap, float* s,
                                float* scond, float* amax );
lapack_int LAPACKE_zppequ_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_double* ap, double* s,
                                double* scond, double* amax );

lapack_int LAPACKE_spprfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* ap,
                                const float* afp, const float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* ferr, float* berr, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dpprfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* ap,
                                const double* afp, const double* b,
                                lapack_int ldb, double* x, lapack_int ldx,
                                double* ferr, double* berr, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_cpprfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* ap,
                                const lapack_complex_float* afp,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zpprfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs,
                                const lapack_complex_double* ap,
                                const lapack_complex_double* afp,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sppsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, float* ap, float* b,
                               lapack_int ldb );
lapack_int LAPACKE_dppsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, double* ap, double* b,
                               lapack_int ldb );
lapack_int LAPACKE_cppsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_float* ap,
                               lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zppsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_double* ap,
                               lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sppsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs, float* ap,
                                float* afp, char* equed, float* s, float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* rcond, float* ferr, float* berr,
                                float* work, lapack_int* iwork );
lapack_int LAPACKE_dppsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs, double* ap,
                                double* afp, char* equed, double* s, double* b,
                                lapack_int ldb, double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_cppsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                lapack_complex_float* ap,
                                lapack_complex_float* afp, char* equed,
                                float* s, lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* x,
                                lapack_int ldx, float* rcond, float* ferr,
                                float* berr, lapack_complex_float* work,
                                float* rwork );
lapack_int LAPACKE_zppsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                lapack_complex_double* ap,
                                lapack_complex_double* afp, char* equed,
                                double* s, lapack_complex_double* b,
                                lapack_int ldb, lapack_complex_double* x,
                                lapack_int ldx, double* rcond, double* ferr,
                                double* berr, lapack_complex_double* work,
                                double* rwork );

lapack_int LAPACKE_spptrf_work( int matrix_order, char uplo, lapack_int n,
                                float* ap );
lapack_int LAPACKE_dpptrf_work( int matrix_order, char uplo, lapack_int n,
                                double* ap );
lapack_int LAPACKE_cpptrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* ap );
lapack_int LAPACKE_zpptrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* ap );

lapack_int LAPACKE_spptri_work( int matrix_order, char uplo, lapack_int n,
                                float* ap );
lapack_int LAPACKE_dpptri_work( int matrix_order, char uplo, lapack_int n,
                                double* ap );
lapack_int LAPACKE_cpptri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* ap );
lapack_int LAPACKE_zpptri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* ap );

lapack_int LAPACKE_spptrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* ap, float* b,
                                lapack_int ldb );
lapack_int LAPACKE_dpptrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* ap, double* b,
                                lapack_int ldb );
lapack_int LAPACKE_cpptrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* ap,
                                lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zpptrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs,
                                const lapack_complex_double* ap,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_spstrf_work( int matrix_order, char uplo, lapack_int n,
                                float* a, lapack_int lda, lapack_int* piv,
                                lapack_int* rank, float tol, float* work );
lapack_int LAPACKE_dpstrf_work( int matrix_order, char uplo, lapack_int n,
                                double* a, lapack_int lda, lapack_int* piv,
                                lapack_int* rank, double tol, double* work );
lapack_int LAPACKE_cpstrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int* piv, lapack_int* rank, float tol,
                                float* work );
lapack_int LAPACKE_zpstrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* piv, lapack_int* rank, double tol,
                                double* work );

lapack_int LAPACKE_sptcon_work( lapack_int n, const float* d, const float* e,
                                float anorm, float* rcond, float* work );
lapack_int LAPACKE_dptcon_work( lapack_int n, const double* d, const double* e,
                                double anorm, double* rcond, double* work );
lapack_int LAPACKE_cptcon_work( lapack_int n, const float* d,
                                const lapack_complex_float* e, float anorm,
                                float* rcond, float* work );
lapack_int LAPACKE_zptcon_work( lapack_int n, const double* d,
                                const lapack_complex_double* e, double anorm,
                                double* rcond, double* work );

lapack_int LAPACKE_spteqr_work( int matrix_order, char compz, lapack_int n,
                                float* d, float* e, float* z, lapack_int ldz,
                                float* work );
lapack_int LAPACKE_dpteqr_work( int matrix_order, char compz, lapack_int n,
                                double* d, double* e, double* z, lapack_int ldz,
                                double* work );
lapack_int LAPACKE_cpteqr_work( int matrix_order, char compz, lapack_int n,
                                float* d, float* e, lapack_complex_float* z,
                                lapack_int ldz, float* work );
lapack_int LAPACKE_zpteqr_work( int matrix_order, char compz, lapack_int n,
                                double* d, double* e, lapack_complex_double* z,
                                lapack_int ldz, double* work );

lapack_int LAPACKE_sptrfs_work( int matrix_order, lapack_int n, lapack_int nrhs,
                                const float* d, const float* e, const float* df,
                                const float* ef, const float* b, lapack_int ldb,
                                float* x, lapack_int ldx, float* ferr,
                                float* berr, float* work );
lapack_int LAPACKE_dptrfs_work( int matrix_order, lapack_int n, lapack_int nrhs,
                                const double* d, const double* e,
                                const double* df, const double* ef,
                                const double* b, lapack_int ldb, double* x,
                                lapack_int ldx, double* ferr, double* berr,
                                double* work );
lapack_int LAPACKE_cptrfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* d,
                                const lapack_complex_float* e, const float* df,
                                const lapack_complex_float* ef,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zptrfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* d,
                                const lapack_complex_double* e,
                                const double* df,
                                const lapack_complex_double* ef,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               float* d, float* e, float* b, lapack_int ldb );
lapack_int LAPACKE_dptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               double* d, double* e, double* b,
                               lapack_int ldb );
lapack_int LAPACKE_cptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               float* d, lapack_complex_float* e,
                               lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
                               double* d, lapack_complex_double* e,
                               lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sptsvx_work( int matrix_order, char fact, lapack_int n,
                                lapack_int nrhs, const float* d, const float* e,
                                float* df, float* ef, const float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* rcond, float* ferr, float* berr,
                                float* work );
lapack_int LAPACKE_dptsvx_work( int matrix_order, char fact, lapack_int n,
                                lapack_int nrhs, const double* d,
                                const double* e, double* df, double* ef,
                                const double* b, lapack_int ldb, double* x,
                                lapack_int ldx, double* rcond, double* ferr,
                                double* berr, double* work );
lapack_int LAPACKE_cptsvx_work( int matrix_order, char fact, lapack_int n,
                                lapack_int nrhs, const float* d,
                                const lapack_complex_float* e, float* df,
                                lapack_complex_float* ef,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* rcond, float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zptsvx_work( int matrix_order, char fact, lapack_int n,
                                lapack_int nrhs, const double* d,
                                const lapack_complex_double* e, double* df,
                                lapack_complex_double* ef,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_spttrf_work( lapack_int n, float* d, float* e );
lapack_int LAPACKE_dpttrf_work( lapack_int n, double* d, double* e );
lapack_int LAPACKE_cpttrf_work( lapack_int n, float* d,
                                lapack_complex_float* e );
lapack_int LAPACKE_zpttrf_work( lapack_int n, double* d,
                                lapack_complex_double* e );

lapack_int LAPACKE_spttrs_work( int matrix_order, lapack_int n, lapack_int nrhs,
                                const float* d, const float* e, float* b,
                                lapack_int ldb );
lapack_int LAPACKE_dpttrs_work( int matrix_order, lapack_int n, lapack_int nrhs,
                                const double* d, const double* e, double* b,
                                lapack_int ldb );
lapack_int LAPACKE_cpttrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* d,
                                const lapack_complex_float* e,
                                lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zpttrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* d,
                                const lapack_complex_double* e,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_ssbev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_int kd, float* ab,
                               lapack_int ldab, float* w, float* z,
                               lapack_int ldz, float* work );
lapack_int LAPACKE_dsbev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_int kd, double* ab,
                               lapack_int ldab, double* w, double* z,
                               lapack_int ldz, double* work );

lapack_int LAPACKE_ssbevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_int kd, float* ab,
                                lapack_int ldab, float* w, float* z,
                                lapack_int ldz, float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_dsbevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_int kd, double* ab,
                                lapack_int ldab, double* w, double* z,
                                lapack_int ldz, double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_ssbevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, lapack_int kd,
                                float* ab, lapack_int ldab, float* q,
                                lapack_int ldq, float vl, float vu,
                                lapack_int il, lapack_int iu, float abstol,
                                lapack_int* m, float* w, float* z,
                                lapack_int ldz, float* work, lapack_int* iwork,
                                lapack_int* ifail );
lapack_int LAPACKE_dsbevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, lapack_int kd,
                                double* ab, lapack_int ldab, double* q,
                                lapack_int ldq, double vl, double vu,
                                lapack_int il, lapack_int iu, double abstol,
                                lapack_int* m, double* w, double* z,
                                lapack_int ldz, double* work, lapack_int* iwork,
                                lapack_int* ifail );

lapack_int LAPACKE_ssbgst_work( int matrix_order, char vect, char uplo,
                                lapack_int n, lapack_int ka, lapack_int kb,
                                float* ab, lapack_int ldab, const float* bb,
                                lapack_int ldbb, float* x, lapack_int ldx,
                                float* work );
lapack_int LAPACKE_dsbgst_work( int matrix_order, char vect, char uplo,
                                lapack_int n, lapack_int ka, lapack_int kb,
                                double* ab, lapack_int ldab, const double* bb,
                                lapack_int ldbb, double* x, lapack_int ldx,
                                double* work );

lapack_int LAPACKE_ssbgv_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_int ka, lapack_int kb,
                               float* ab, lapack_int ldab, float* bb,
                               lapack_int ldbb, float* w, float* z,
                               lapack_int ldz, float* work );
lapack_int LAPACKE_dsbgv_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, lapack_int ka, lapack_int kb,
                               double* ab, lapack_int ldab, double* bb,
                               lapack_int ldbb, double* w, double* z,
                               lapack_int ldz, double* work );

lapack_int LAPACKE_ssbgvd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_int ka, lapack_int kb,
                                float* ab, lapack_int ldab, float* bb,
                                lapack_int ldbb, float* w, float* z,
                                lapack_int ldz, float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_dsbgvd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, lapack_int ka, lapack_int kb,
                                double* ab, lapack_int ldab, double* bb,
                                lapack_int ldbb, double* w, double* z,
                                lapack_int ldz, double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_ssbgvx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, lapack_int ka,
                                lapack_int kb, float* ab, lapack_int ldab,
                                float* bb, lapack_int ldbb, float* q,
                                lapack_int ldq, float vl, float vu,
                                lapack_int il, lapack_int iu, float abstol,
                                lapack_int* m, float* w, float* z,
                                lapack_int ldz, float* work, lapack_int* iwork,
                                lapack_int* ifail );
lapack_int LAPACKE_dsbgvx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, lapack_int ka,
                                lapack_int kb, double* ab, lapack_int ldab,
                                double* bb, lapack_int ldbb, double* q,
                                lapack_int ldq, double vl, double vu,
                                lapack_int il, lapack_int iu, double abstol,
                                lapack_int* m, double* w, double* z,
                                lapack_int ldz, double* work, lapack_int* iwork,
                                lapack_int* ifail );

lapack_int LAPACKE_ssbtrd_work( int matrix_order, char vect, char uplo,
                                lapack_int n, lapack_int kd, float* ab,
                                lapack_int ldab, float* d, float* e, float* q,
                                lapack_int ldq, float* work );
lapack_int LAPACKE_dsbtrd_work( int matrix_order, char vect, char uplo,
                                lapack_int n, lapack_int kd, double* ab,
                                lapack_int ldab, double* d, double* e,
                                double* q, lapack_int ldq, double* work );

lapack_int LAPACKE_ssfrk_work( int matrix_order, char transr, char uplo,
                               char trans, lapack_int n, lapack_int k,
                               float alpha, const float* a, lapack_int lda,
                               float beta, float* c );
lapack_int LAPACKE_dsfrk_work( int matrix_order, char transr, char uplo,
                               char trans, lapack_int n, lapack_int k,
                               double alpha, const double* a, lapack_int lda,
                               double beta, double* c );

lapack_int LAPACKE_sspcon_work( int matrix_order, char uplo, lapack_int n,
                                const float* ap, const lapack_int* ipiv,
                                float anorm, float* rcond, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dspcon_work( int matrix_order, char uplo, lapack_int n,
                                const double* ap, const lapack_int* ipiv,
                                double anorm, double* rcond, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_cspcon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_float* ap,
                                const lapack_int* ipiv, float anorm,
                                float* rcond, lapack_complex_float* work );
lapack_int LAPACKE_zspcon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_double* ap,
                                const lapack_int* ipiv, double anorm,
                                double* rcond, lapack_complex_double* work );

lapack_int LAPACKE_sspev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, float* ap, float* w, float* z,
                               lapack_int ldz, float* work );
lapack_int LAPACKE_dspev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, double* ap, double* w, double* z,
                               lapack_int ldz, double* work );

lapack_int LAPACKE_sspevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, float* ap, float* w, float* z,
                                lapack_int ldz, float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_dspevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, double* ap, double* w, double* z,
                                lapack_int ldz, double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_sspevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, float* ap, float vl,
                                float vu, lapack_int il, lapack_int iu,
                                float abstol, lapack_int* m, float* w, float* z,
                                lapack_int ldz, float* work, lapack_int* iwork,
                                lapack_int* ifail );
lapack_int LAPACKE_dspevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, double* ap, double vl,
                                double vu, lapack_int il, lapack_int iu,
                                double abstol, lapack_int* m, double* w,
                                double* z, lapack_int ldz, double* work,
                                lapack_int* iwork, lapack_int* ifail );

lapack_int LAPACKE_sspgst_work( int matrix_order, lapack_int itype, char uplo,
                                lapack_int n, float* ap, const float* bp );
lapack_int LAPACKE_dspgst_work( int matrix_order, lapack_int itype, char uplo,
                                lapack_int n, double* ap, const double* bp );

lapack_int LAPACKE_sspgv_work( int matrix_order, lapack_int itype, char jobz,
                               char uplo, lapack_int n, float* ap, float* bp,
                               float* w, float* z, lapack_int ldz,
                               float* work );
lapack_int LAPACKE_dspgv_work( int matrix_order, lapack_int itype, char jobz,
                               char uplo, lapack_int n, double* ap, double* bp,
                               double* w, double* z, lapack_int ldz,
                               double* work );

lapack_int LAPACKE_sspgvd_work( int matrix_order, lapack_int itype, char jobz,
                                char uplo, lapack_int n, float* ap, float* bp,
                                float* w, float* z, lapack_int ldz, float* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_dspgvd_work( int matrix_order, lapack_int itype, char jobz,
                                char uplo, lapack_int n, double* ap, double* bp,
                                double* w, double* z, lapack_int ldz,
                                double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_sspgvx_work( int matrix_order, lapack_int itype, char jobz,
                                char range, char uplo, lapack_int n, float* ap,
                                float* bp, float vl, float vu, lapack_int il,
                                lapack_int iu, float abstol, lapack_int* m,
                                float* w, float* z, lapack_int ldz, float* work,
                                lapack_int* iwork, lapack_int* ifail );
lapack_int LAPACKE_dspgvx_work( int matrix_order, lapack_int itype, char jobz,
                                char range, char uplo, lapack_int n, double* ap,
                                double* bp, double vl, double vu, lapack_int il,
                                lapack_int iu, double abstol, lapack_int* m,
                                double* w, double* z, lapack_int ldz,
                                double* work, lapack_int* iwork,
                                lapack_int* ifail );

lapack_int LAPACKE_ssprfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* ap,
                                const float* afp, const lapack_int* ipiv,
                                const float* b, lapack_int ldb, float* x,
                                lapack_int ldx, float* ferr, float* berr,
                                float* work, lapack_int* iwork );
lapack_int LAPACKE_dsprfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* ap,
                                const double* afp, const lapack_int* ipiv,
                                const double* b, lapack_int ldb, double* x,
                                lapack_int ldx, double* ferr, double* berr,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_csprfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* ap,
                                const lapack_complex_float* afp,
                                const lapack_int* ipiv,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zsprfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs,
                                const lapack_complex_double* ap,
                                const lapack_complex_double* afp,
                                const lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_sspsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, float* ap, lapack_int* ipiv,
                               float* b, lapack_int ldb );
lapack_int LAPACKE_dspsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, double* ap, lapack_int* ipiv,
                               double* b, lapack_int ldb );
lapack_int LAPACKE_cspsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_float* ap,
                               lapack_int* ipiv, lapack_complex_float* b,
                               lapack_int ldb );
lapack_int LAPACKE_zspsv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_double* ap,
                               lapack_int* ipiv, lapack_complex_double* b,
                               lapack_int ldb );

lapack_int LAPACKE_sspsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs, const float* ap,
                                float* afp, lapack_int* ipiv, const float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* rcond, float* ferr, float* berr,
                                float* work, lapack_int* iwork );
lapack_int LAPACKE_dspsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs, const double* ap,
                                double* afp, lapack_int* ipiv, const double* b,
                                lapack_int ldb, double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_cspsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_float* ap,
                                lapack_complex_float* afp, lapack_int* ipiv,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* rcond, float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zspsvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_double* ap,
                                lapack_complex_double* afp, lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_ssptrd_work( int matrix_order, char uplo, lapack_int n,
                                float* ap, float* d, float* e, float* tau );
lapack_int LAPACKE_dsptrd_work( int matrix_order, char uplo, lapack_int n,
                                double* ap, double* d, double* e, double* tau );

lapack_int LAPACKE_ssptrf_work( int matrix_order, char uplo, lapack_int n,
                                float* ap, lapack_int* ipiv );
lapack_int LAPACKE_dsptrf_work( int matrix_order, char uplo, lapack_int n,
                                double* ap, lapack_int* ipiv );
lapack_int LAPACKE_csptrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* ap, lapack_int* ipiv );
lapack_int LAPACKE_zsptrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* ap, lapack_int* ipiv );

lapack_int LAPACKE_ssptri_work( int matrix_order, char uplo, lapack_int n,
                                float* ap, const lapack_int* ipiv,
                                float* work );
lapack_int LAPACKE_dsptri_work( int matrix_order, char uplo, lapack_int n,
                                double* ap, const lapack_int* ipiv,
                                double* work );
lapack_int LAPACKE_csptri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* ap,
                                const lapack_int* ipiv,
                                lapack_complex_float* work );
lapack_int LAPACKE_zsptri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* ap,
                                const lapack_int* ipiv,
                                lapack_complex_double* work );

lapack_int LAPACKE_ssptrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* ap,
                                const lapack_int* ipiv, float* b,
                                lapack_int ldb );
lapack_int LAPACKE_dsptrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* ap,
                                const lapack_int* ipiv, double* b,
                                lapack_int ldb );
lapack_int LAPACKE_csptrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* ap,
                                const lapack_int* ipiv, lapack_complex_float* b,
                                lapack_int ldb );
lapack_int LAPACKE_zsptrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs,
                                const lapack_complex_double* ap,
                                const lapack_int* ipiv,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sstebz_work( char range, char order, lapack_int n, float vl,
                                float vu, lapack_int il, lapack_int iu,
                                float abstol, const float* d, const float* e,
                                lapack_int* m, lapack_int* nsplit, float* w,
                                lapack_int* iblock, lapack_int* isplit,
                                float* work, lapack_int* iwork );
lapack_int LAPACKE_dstebz_work( char range, char order, lapack_int n, double vl,
                                double vu, lapack_int il, lapack_int iu,
                                double abstol, const double* d, const double* e,
                                lapack_int* m, lapack_int* nsplit, double* w,
                                lapack_int* iblock, lapack_int* isplit,
                                double* work, lapack_int* iwork );

lapack_int LAPACKE_sstedc_work( int matrix_order, char compz, lapack_int n,
                                float* d, float* e, float* z, lapack_int ldz,
                                float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_dstedc_work( int matrix_order, char compz, lapack_int n,
                                double* d, double* e, double* z, lapack_int ldz,
                                double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_cstedc_work( int matrix_order, char compz, lapack_int n,
                                float* d, float* e, lapack_complex_float* z,
                                lapack_int ldz, lapack_complex_float* work,
                                lapack_int lwork, float* rwork,
                                lapack_int lrwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_zstedc_work( int matrix_order, char compz, lapack_int n,
                                double* d, double* e, lapack_complex_double* z,
                                lapack_int ldz, lapack_complex_double* work,
                                lapack_int lwork, double* rwork,
                                lapack_int lrwork, lapack_int* iwork,
                                lapack_int liwork );

lapack_int LAPACKE_sstegr_work( int matrix_order, char jobz, char range,
                                lapack_int n, float* d, float* e, float vl,
                                float vu, lapack_int il, lapack_int iu,
                                float abstol, lapack_int* m, float* w, float* z,
                                lapack_int ldz, lapack_int* isuppz, float* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_dstegr_work( int matrix_order, char jobz, char range,
                                lapack_int n, double* d, double* e, double vl,
                                double vu, lapack_int il, lapack_int iu,
                                double abstol, lapack_int* m, double* w,
                                double* z, lapack_int ldz, lapack_int* isuppz,
                                double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_cstegr_work( int matrix_order, char jobz, char range,
                                lapack_int n, float* d, float* e, float vl,
                                float vu, lapack_int il, lapack_int iu,
                                float abstol, lapack_int* m, float* w,
                                lapack_complex_float* z, lapack_int ldz,
                                lapack_int* isuppz, float* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_zstegr_work( int matrix_order, char jobz, char range,
                                lapack_int n, double* d, double* e, double vl,
                                double vu, lapack_int il, lapack_int iu,
                                double abstol, lapack_int* m, double* w,
                                lapack_complex_double* z, lapack_int ldz,
                                lapack_int* isuppz, double* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int liwork );

lapack_int LAPACKE_sstein_work( int matrix_order, lapack_int n, const float* d,
                                const float* e, lapack_int m, const float* w,
                                const lapack_int* iblock,
                                const lapack_int* isplit, float* z,
                                lapack_int ldz, float* work, lapack_int* iwork,
                                lapack_int* ifailv );
lapack_int LAPACKE_dstein_work( int matrix_order, lapack_int n, const double* d,
                                const double* e, lapack_int m, const double* w,
                                const lapack_int* iblock,
                                const lapack_int* isplit, double* z,
                                lapack_int ldz, double* work, lapack_int* iwork,
                                lapack_int* ifailv );
lapack_int LAPACKE_cstein_work( int matrix_order, lapack_int n, const float* d,
                                const float* e, lapack_int m, const float* w,
                                const lapack_int* iblock,
                                const lapack_int* isplit,
                                lapack_complex_float* z, lapack_int ldz,
                                float* work, lapack_int* iwork,
                                lapack_int* ifailv );
lapack_int LAPACKE_zstein_work( int matrix_order, lapack_int n, const double* d,
                                const double* e, lapack_int m, const double* w,
                                const lapack_int* iblock,
                                const lapack_int* isplit,
                                lapack_complex_double* z, lapack_int ldz,
                                double* work, lapack_int* iwork,
                                lapack_int* ifailv );

lapack_int LAPACKE_sstemr_work( int matrix_order, char jobz, char range,
                                lapack_int n, float* d, float* e, float vl,
                                float vu, lapack_int il, lapack_int iu,
                                lapack_int* m, float* w, float* z,
                                lapack_int ldz, lapack_int nzc,
                                lapack_int* isuppz, lapack_logical* tryrac,
                                float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_dstemr_work( int matrix_order, char jobz, char range,
                                lapack_int n, double* d, double* e, double vl,
                                double vu, lapack_int il, lapack_int iu,
                                lapack_int* m, double* w, double* z,
                                lapack_int ldz, lapack_int nzc,
                                lapack_int* isuppz, lapack_logical* tryrac,
                                double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_cstemr_work( int matrix_order, char jobz, char range,
                                lapack_int n, float* d, float* e, float vl,
                                float vu, lapack_int il, lapack_int iu,
                                lapack_int* m, float* w,
                                lapack_complex_float* z, lapack_int ldz,
                                lapack_int nzc, lapack_int* isuppz,
                                lapack_logical* tryrac, float* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_zstemr_work( int matrix_order, char jobz, char range,
                                lapack_int n, double* d, double* e, double vl,
                                double vu, lapack_int il, lapack_int iu,
                                lapack_int* m, double* w,
                                lapack_complex_double* z, lapack_int ldz,
                                lapack_int nzc, lapack_int* isuppz,
                                lapack_logical* tryrac, double* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int liwork );

lapack_int LAPACKE_ssteqr_work( int matrix_order, char compz, lapack_int n,
                                float* d, float* e, float* z, lapack_int ldz,
                                float* work );
lapack_int LAPACKE_dsteqr_work( int matrix_order, char compz, lapack_int n,
                                double* d, double* e, double* z, lapack_int ldz,
                                double* work );
lapack_int LAPACKE_csteqr_work( int matrix_order, char compz, lapack_int n,
                                float* d, float* e, lapack_complex_float* z,
                                lapack_int ldz, float* work );
lapack_int LAPACKE_zsteqr_work( int matrix_order, char compz, lapack_int n,
                                double* d, double* e, lapack_complex_double* z,
                                lapack_int ldz, double* work );

lapack_int LAPACKE_ssterf_work( lapack_int n, float* d, float* e );
lapack_int LAPACKE_dsterf_work( lapack_int n, double* d, double* e );

lapack_int LAPACKE_sstev_work( int matrix_order, char jobz, lapack_int n,
                               float* d, float* e, float* z, lapack_int ldz,
                               float* work );
lapack_int LAPACKE_dstev_work( int matrix_order, char jobz, lapack_int n,
                               double* d, double* e, double* z, lapack_int ldz,
                               double* work );

lapack_int LAPACKE_sstevd_work( int matrix_order, char jobz, lapack_int n,
                                float* d, float* e, float* z, lapack_int ldz,
                                float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_dstevd_work( int matrix_order, char jobz, lapack_int n,
                                double* d, double* e, double* z, lapack_int ldz,
                                double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_sstevr_work( int matrix_order, char jobz, char range,
                                lapack_int n, float* d, float* e, float vl,
                                float vu, lapack_int il, lapack_int iu,
                                float abstol, lapack_int* m, float* w, float* z,
                                lapack_int ldz, lapack_int* isuppz, float* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_dstevr_work( int matrix_order, char jobz, char range,
                                lapack_int n, double* d, double* e, double vl,
                                double vu, lapack_int il, lapack_int iu,
                                double abstol, lapack_int* m, double* w,
                                double* z, lapack_int ldz, lapack_int* isuppz,
                                double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_sstevx_work( int matrix_order, char jobz, char range,
                                lapack_int n, float* d, float* e, float vl,
                                float vu, lapack_int il, lapack_int iu,
                                float abstol, lapack_int* m, float* w, float* z,
                                lapack_int ldz, float* work, lapack_int* iwork,
                                lapack_int* ifail );
lapack_int LAPACKE_dstevx_work( int matrix_order, char jobz, char range,
                                lapack_int n, double* d, double* e, double vl,
                                double vu, lapack_int il, lapack_int iu,
                                double abstol, lapack_int* m, double* w,
                                double* z, lapack_int ldz, double* work,
                                lapack_int* iwork, lapack_int* ifail );

lapack_int LAPACKE_ssycon_work( int matrix_order, char uplo, lapack_int n,
                                const float* a, lapack_int lda,
                                const lapack_int* ipiv, float anorm,
                                float* rcond, float* work, lapack_int* iwork );
lapack_int LAPACKE_dsycon_work( int matrix_order, char uplo, lapack_int n,
                                const double* a, lapack_int lda,
                                const lapack_int* ipiv, double anorm,
                                double* rcond, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_csycon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_int* ipiv, float anorm,
                                float* rcond, lapack_complex_float* work );
lapack_int LAPACKE_zsycon_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_int* ipiv, double anorm,
                                double* rcond, lapack_complex_double* work );

lapack_int LAPACKE_ssyequb_work( int matrix_order, char uplo, lapack_int n,
                                 const float* a, lapack_int lda, float* s,
                                 float* scond, float* amax, float* work );
lapack_int LAPACKE_dsyequb_work( int matrix_order, char uplo, lapack_int n,
                                 const double* a, lapack_int lda, double* s,
                                 double* scond, double* amax, double* work );
lapack_int LAPACKE_csyequb_work( int matrix_order, char uplo, lapack_int n,
                                 const lapack_complex_float* a, lapack_int lda,
                                 float* s, float* scond, float* amax,
                                 lapack_complex_float* work );
lapack_int LAPACKE_zsyequb_work( int matrix_order, char uplo, lapack_int n,
                                 const lapack_complex_double* a, lapack_int lda,
                                 double* s, double* scond, double* amax,
                                 lapack_complex_double* work );

lapack_int LAPACKE_ssyev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, float* a, lapack_int lda, float* w,
                               float* work, lapack_int lwork );
lapack_int LAPACKE_dsyev_work( int matrix_order, char jobz, char uplo,
                               lapack_int n, double* a, lapack_int lda,
                               double* w, double* work, lapack_int lwork );

lapack_int LAPACKE_ssyevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, float* a, lapack_int lda,
                                float* w, float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_dsyevd_work( int matrix_order, char jobz, char uplo,
                                lapack_int n, double* a, lapack_int lda,
                                double* w, double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_ssyevr_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, float* a,
                                lapack_int lda, float vl, float vu,
                                lapack_int il, lapack_int iu, float abstol,
                                lapack_int* m, float* w, float* z,
                                lapack_int ldz, lapack_int* isuppz, float* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_dsyevr_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, double* a,
                                lapack_int lda, double vl, double vu,
                                lapack_int il, lapack_int iu, double abstol,
                                lapack_int* m, double* w, double* z,
                                lapack_int ldz, lapack_int* isuppz,
                                double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_ssyevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, float* a,
                                lapack_int lda, float vl, float vu,
                                lapack_int il, lapack_int iu, float abstol,
                                lapack_int* m, float* w, float* z,
                                lapack_int ldz, float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int* ifail );
lapack_int LAPACKE_dsyevx_work( int matrix_order, char jobz, char range,
                                char uplo, lapack_int n, double* a,
                                lapack_int lda, double vl, double vu,
                                lapack_int il, lapack_int iu, double abstol,
                                lapack_int* m, double* w, double* z,
                                lapack_int ldz, double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int* ifail );

lapack_int LAPACKE_ssygst_work( int matrix_order, lapack_int itype, char uplo,
                                lapack_int n, float* a, lapack_int lda,
                                const float* b, lapack_int ldb );
lapack_int LAPACKE_dsygst_work( int matrix_order, lapack_int itype, char uplo,
                                lapack_int n, double* a, lapack_int lda,
                                const double* b, lapack_int ldb );

lapack_int LAPACKE_ssygv_work( int matrix_order, lapack_int itype, char jobz,
                               char uplo, lapack_int n, float* a,
                               lapack_int lda, float* b, lapack_int ldb,
                               float* w, float* work, lapack_int lwork );
lapack_int LAPACKE_dsygv_work( int matrix_order, lapack_int itype, char jobz,
                               char uplo, lapack_int n, double* a,
                               lapack_int lda, double* b, lapack_int ldb,
                               double* w, double* work, lapack_int lwork );

lapack_int LAPACKE_ssygvd_work( int matrix_order, lapack_int itype, char jobz,
                                char uplo, lapack_int n, float* a,
                                lapack_int lda, float* b, lapack_int ldb,
                                float* w, float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_dsygvd_work( int matrix_order, lapack_int itype, char jobz,
                                char uplo, lapack_int n, double* a,
                                lapack_int lda, double* b, lapack_int ldb,
                                double* w, double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );

lapack_int LAPACKE_ssygvx_work( int matrix_order, lapack_int itype, char jobz,
                                char range, char uplo, lapack_int n, float* a,
                                lapack_int lda, float* b, lapack_int ldb,
                                float vl, float vu, lapack_int il,
                                lapack_int iu, float abstol, lapack_int* m,
                                float* w, float* z, lapack_int ldz, float* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int* ifail );
lapack_int LAPACKE_dsygvx_work( int matrix_order, lapack_int itype, char jobz,
                                char range, char uplo, lapack_int n, double* a,
                                lapack_int lda, double* b, lapack_int ldb,
                                double vl, double vu, lapack_int il,
                                lapack_int iu, double abstol, lapack_int* m,
                                double* w, double* z, lapack_int ldz,
                                double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int* ifail );

lapack_int LAPACKE_ssyrfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* a, lapack_int lda,
                                const float* af, lapack_int ldaf,
                                const lapack_int* ipiv, const float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* ferr, float* berr, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dsyrfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* a,
                                lapack_int lda, const double* af,
                                lapack_int ldaf, const lapack_int* ipiv,
                                const double* b, lapack_int ldb, double* x,
                                lapack_int ldx, double* ferr, double* berr,
                                double* work, lapack_int* iwork );
lapack_int LAPACKE_csyrfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* af,
                                lapack_int ldaf, const lapack_int* ipiv,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_zsyrfs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_double* a,
                                lapack_int lda, const lapack_complex_double* af,
                                lapack_int ldaf, const lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_ssyrfsx_work( int matrix_order, char uplo, char equed,
                                 lapack_int n, lapack_int nrhs, const float* a,
                                 lapack_int lda, const float* af,
                                 lapack_int ldaf, const lapack_int* ipiv,
                                 const float* s, const float* b, lapack_int ldb,
                                 float* x, lapack_int ldx, float* rcond,
                                 float* berr, lapack_int n_err_bnds,
                                 float* err_bnds_norm, float* err_bnds_comp,
                                 lapack_int nparams, float* params, float* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_dsyrfsx_work( int matrix_order, char uplo, char equed,
                                 lapack_int n, lapack_int nrhs, const double* a,
                                 lapack_int lda, const double* af,
                                 lapack_int ldaf, const lapack_int* ipiv,
                                 const double* s, const double* b,
                                 lapack_int ldb, double* x, lapack_int ldx,
                                 double* rcond, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, double* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_csyrfsx_work( int matrix_order, char uplo, char equed,
                                 lapack_int n, lapack_int nrhs,
                                 const lapack_complex_float* a, lapack_int lda,
                                 const lapack_complex_float* af,
                                 lapack_int ldaf, const lapack_int* ipiv,
                                 const float* s, const lapack_complex_float* b,
                                 lapack_int ldb, lapack_complex_float* x,
                                 lapack_int ldx, float* rcond, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, lapack_complex_float* work,
                                 float* rwork );
lapack_int LAPACKE_zsyrfsx_work( int matrix_order, char uplo, char equed,
                                 lapack_int n, lapack_int nrhs,
                                 const lapack_complex_double* a, lapack_int lda,
                                 const lapack_complex_double* af,
                                 lapack_int ldaf, const lapack_int* ipiv,
                                 const double* s,
                                 const lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* x, lapack_int ldx,
                                 double* rcond, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, lapack_complex_double* work,
                                 double* rwork );

lapack_int LAPACKE_ssysv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, float* a, lapack_int lda,
                               lapack_int* ipiv, float* b, lapack_int ldb,
                               float* work, lapack_int lwork );
lapack_int LAPACKE_dsysv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, double* a, lapack_int lda,
                               lapack_int* ipiv, double* b, lapack_int ldb,
                               double* work, lapack_int lwork );
lapack_int LAPACKE_csysv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_float* a,
                               lapack_int lda, lapack_int* ipiv,
                               lapack_complex_float* b, lapack_int ldb,
                               lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zsysv_work( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_double* a,
                               lapack_int lda, lapack_int* ipiv,
                               lapack_complex_double* b, lapack_int ldb,
                               lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_ssysv_rook_work( int matrix_order, char uplo, lapack_int n,
                                    lapack_int nrhs, float* a, lapack_int lda,
                                    lapack_int* ipiv, float* b, lapack_int ldb,
                                    float* work, lapack_int lwork );
lapack_int LAPACKE_dsysv_rook_work( int matrix_order, char uplo, lapack_int n,
                                    lapack_int nrhs, double* a, lapack_int lda,
                                    lapack_int* ipiv, double* b, lapack_int ldb,
                                    double* work, lapack_int lwork );
lapack_int LAPACKE_csysv_rook_work( int matrix_order, char uplo, lapack_int n,
                                    lapack_int nrhs, lapack_complex_float* a,
                                    lapack_int lda, lapack_int* ipiv,
                                    lapack_complex_float* b, lapack_int ldb,
                                    lapack_complex_float* work,
                                    lapack_int lwork );
lapack_int LAPACKE_zsysv_rook_work( int matrix_order, char uplo, lapack_int n,
                                    lapack_int nrhs, lapack_complex_double* a,
                                    lapack_int lda, lapack_int* ipiv,
                                    lapack_complex_double* b, lapack_int ldb,
                                    lapack_complex_double* work,
                                    lapack_int lwork );

lapack_int LAPACKE_ssysvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs, const float* a,
                                lapack_int lda, float* af, lapack_int ldaf,
                                lapack_int* ipiv, const float* b,
                                lapack_int ldb, float* x, lapack_int ldx,
                                float* rcond, float* ferr, float* berr,
                                float* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_dsysvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs, const double* a,
                                lapack_int lda, double* af, lapack_int ldaf,
                                lapack_int* ipiv, const double* b,
                                lapack_int ldb, double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                double* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_csysvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* af, lapack_int ldaf,
                                lapack_int* ipiv, const lapack_complex_float* b,
                                lapack_int ldb, lapack_complex_float* x,
                                lapack_int ldx, float* rcond, float* ferr,
                                float* berr, lapack_complex_float* work,
                                lapack_int lwork, float* rwork );
lapack_int LAPACKE_zsysvx_work( int matrix_order, char fact, char uplo,
                                lapack_int n, lapack_int nrhs,
                                const lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* af, lapack_int ldaf,
                                lapack_int* ipiv,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* x, lapack_int ldx,
                                double* rcond, double* ferr, double* berr,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork );

lapack_int LAPACKE_ssysvxx_work( int matrix_order, char fact, char uplo,
                                 lapack_int n, lapack_int nrhs, float* a,
                                 lapack_int lda, float* af, lapack_int ldaf,
                                 lapack_int* ipiv, char* equed, float* s,
                                 float* b, lapack_int ldb, float* x,
                                 lapack_int ldx, float* rcond, float* rpvgrw,
                                 float* berr, lapack_int n_err_bnds,
                                 float* err_bnds_norm, float* err_bnds_comp,
                                 lapack_int nparams, float* params, float* work,
                                 lapack_int* iwork );
lapack_int LAPACKE_dsysvxx_work( int matrix_order, char fact, char uplo,
                                 lapack_int n, lapack_int nrhs, double* a,
                                 lapack_int lda, double* af, lapack_int ldaf,
                                 lapack_int* ipiv, char* equed, double* s,
                                 double* b, lapack_int ldb, double* x,
                                 lapack_int ldx, double* rcond, double* rpvgrw,
                                 double* berr, lapack_int n_err_bnds,
                                 double* err_bnds_norm, double* err_bnds_comp,
                                 lapack_int nparams, double* params,
                                 double* work, lapack_int* iwork );
lapack_int LAPACKE_csysvxx_work( int matrix_order, char fact, char uplo,
                                 lapack_int n, lapack_int nrhs,
                                 lapack_complex_float* a, lapack_int lda,
                                 lapack_complex_float* af, lapack_int ldaf,
                                 lapack_int* ipiv, char* equed, float* s,
                                 lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* x, lapack_int ldx,
                                 float* rcond, float* rpvgrw, float* berr,
                                 lapack_int n_err_bnds, float* err_bnds_norm,
                                 float* err_bnds_comp, lapack_int nparams,
                                 float* params, lapack_complex_float* work,
                                 float* rwork );
lapack_int LAPACKE_zsysvxx_work( int matrix_order, char fact, char uplo,
                                 lapack_int n, lapack_int nrhs,
                                 lapack_complex_double* a, lapack_int lda,
                                 lapack_complex_double* af, lapack_int ldaf,
                                 lapack_int* ipiv, char* equed, double* s,
                                 lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* x, lapack_int ldx,
                                 double* rcond, double* rpvgrw, double* berr,
                                 lapack_int n_err_bnds, double* err_bnds_norm,
                                 double* err_bnds_comp, lapack_int nparams,
                                 double* params, lapack_complex_double* work,
                                 double* rwork );

lapack_int LAPACKE_ssytrd_work( int matrix_order, char uplo, lapack_int n,
                                float* a, lapack_int lda, float* d, float* e,
                                float* tau, float* work, lapack_int lwork );
lapack_int LAPACKE_dsytrd_work( int matrix_order, char uplo, lapack_int n,
                                double* a, lapack_int lda, double* d, double* e,
                                double* tau, double* work, lapack_int lwork );

lapack_int LAPACKE_ssytrf_work( int matrix_order, char uplo, lapack_int n,
                                float* a, lapack_int lda, lapack_int* ipiv,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dsytrf_work( int matrix_order, char uplo, lapack_int n,
                                double* a, lapack_int lda, lapack_int* ipiv,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_csytrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int* ipiv, lapack_complex_float* work,
                                lapack_int lwork );
lapack_int LAPACKE_zsytrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* ipiv, lapack_complex_double* work,
                                lapack_int lwork );

lapack_int LAPACKE_ssytri_work( int matrix_order, char uplo, lapack_int n,
                                float* a, lapack_int lda,
                                const lapack_int* ipiv, float* work );
lapack_int LAPACKE_dsytri_work( int matrix_order, char uplo, lapack_int n,
                                double* a, lapack_int lda,
                                const lapack_int* ipiv, double* work );
lapack_int LAPACKE_csytri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                const lapack_int* ipiv,
                                lapack_complex_float* work );
lapack_int LAPACKE_zsytri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                const lapack_int* ipiv,
                                lapack_complex_double* work );

lapack_int LAPACKE_ssytrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* a, lapack_int lda,
                                const lapack_int* ipiv, float* b,
                                lapack_int ldb );
lapack_int LAPACKE_dsytrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* a,
                                lapack_int lda, const lapack_int* ipiv,
                                double* b, lapack_int ldb );
lapack_int LAPACKE_csytrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* a,
                                lapack_int lda, const lapack_int* ipiv,
                                lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zsytrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_double* a,
                                lapack_int lda, const lapack_int* ipiv,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_stbcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n, lapack_int kd,
                                const float* ab, lapack_int ldab, float* rcond,
                                float* work, lapack_int* iwork );
lapack_int LAPACKE_dtbcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n, lapack_int kd,
                                const double* ab, lapack_int ldab,
                                double* rcond, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_ctbcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n, lapack_int kd,
                                const lapack_complex_float* ab, lapack_int ldab,
                                float* rcond, lapack_complex_float* work,
                                float* rwork );
lapack_int LAPACKE_ztbcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n, lapack_int kd,
                                const lapack_complex_double* ab,
                                lapack_int ldab, double* rcond,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_stbrfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int kd,
                                lapack_int nrhs, const float* ab,
                                lapack_int ldab, const float* b, lapack_int ldb,
                                const float* x, lapack_int ldx, float* ferr,
                                float* berr, float* work, lapack_int* iwork );
lapack_int LAPACKE_dtbrfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int kd,
                                lapack_int nrhs, const double* ab,
                                lapack_int ldab, const double* b,
                                lapack_int ldb, const double* x, lapack_int ldx,
                                double* ferr, double* berr, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_ctbrfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int kd,
                                lapack_int nrhs, const lapack_complex_float* ab,
                                lapack_int ldab, const lapack_complex_float* b,
                                lapack_int ldb, const lapack_complex_float* x,
                                lapack_int ldx, float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_ztbrfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int kd,
                                lapack_int nrhs,
                                const lapack_complex_double* ab,
                                lapack_int ldab, const lapack_complex_double* b,
                                lapack_int ldb, const lapack_complex_double* x,
                                lapack_int ldx, double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_stbtrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int kd,
                                lapack_int nrhs, const float* ab,
                                lapack_int ldab, float* b, lapack_int ldb );
lapack_int LAPACKE_dtbtrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int kd,
                                lapack_int nrhs, const double* ab,
                                lapack_int ldab, double* b, lapack_int ldb );
lapack_int LAPACKE_ctbtrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int kd,
                                lapack_int nrhs, const lapack_complex_float* ab,
                                lapack_int ldab, lapack_complex_float* b,
                                lapack_int ldb );
lapack_int LAPACKE_ztbtrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int kd,
                                lapack_int nrhs,
                                const lapack_complex_double* ab,
                                lapack_int ldab, lapack_complex_double* b,
                                lapack_int ldb );

lapack_int LAPACKE_stfsm_work( int matrix_order, char transr, char side,
                               char uplo, char trans, char diag, lapack_int m,
                               lapack_int n, float alpha, const float* a,
                               float* b, lapack_int ldb );
lapack_int LAPACKE_dtfsm_work( int matrix_order, char transr, char side,
                               char uplo, char trans, char diag, lapack_int m,
                               lapack_int n, double alpha, const double* a,
                               double* b, lapack_int ldb );
lapack_int LAPACKE_ctfsm_work( int matrix_order, char transr, char side,
                               char uplo, char trans, char diag, lapack_int m,
                               lapack_int n, lapack_complex_float alpha,
                               const lapack_complex_float* a,
                               lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_ztfsm_work( int matrix_order, char transr, char side,
                               char uplo, char trans, char diag, lapack_int m,
                               lapack_int n, lapack_complex_double alpha,
                               const lapack_complex_double* a,
                               lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_stftri_work( int matrix_order, char transr, char uplo,
                                char diag, lapack_int n, float* a );
lapack_int LAPACKE_dtftri_work( int matrix_order, char transr, char uplo,
                                char diag, lapack_int n, double* a );
lapack_int LAPACKE_ctftri_work( int matrix_order, char transr, char uplo,
                                char diag, lapack_int n,
                                lapack_complex_float* a );
lapack_int LAPACKE_ztftri_work( int matrix_order, char transr, char uplo,
                                char diag, lapack_int n,
                                lapack_complex_double* a );

lapack_int LAPACKE_stfttp_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const float* arf, float* ap );
lapack_int LAPACKE_dtfttp_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const double* arf, double* ap );
lapack_int LAPACKE_ctfttp_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const lapack_complex_float* arf,
                                lapack_complex_float* ap );
lapack_int LAPACKE_ztfttp_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const lapack_complex_double* arf,
                                lapack_complex_double* ap );

lapack_int LAPACKE_stfttr_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const float* arf, float* a,
                                lapack_int lda );
lapack_int LAPACKE_dtfttr_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const double* arf, double* a,
                                lapack_int lda );
lapack_int LAPACKE_ctfttr_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const lapack_complex_float* arf,
                                lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_ztfttr_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const lapack_complex_double* arf,
                                lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_stgevc_work( int matrix_order, char side, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const float* s, lapack_int lds, const float* p,
                                lapack_int ldp, float* vl, lapack_int ldvl,
                                float* vr, lapack_int ldvr, lapack_int mm,
                                lapack_int* m, float* work );
lapack_int LAPACKE_dtgevc_work( int matrix_order, char side, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const double* s, lapack_int lds,
                                const double* p, lapack_int ldp, double* vl,
                                lapack_int ldvl, double* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m, double* work );
lapack_int LAPACKE_ctgevc_work( int matrix_order, char side, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const lapack_complex_float* s, lapack_int lds,
                                const lapack_complex_float* p, lapack_int ldp,
                                lapack_complex_float* vl, lapack_int ldvl,
                                lapack_complex_float* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_ztgevc_work( int matrix_order, char side, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const lapack_complex_double* s, lapack_int lds,
                                const lapack_complex_double* p, lapack_int ldp,
                                lapack_complex_double* vl, lapack_int ldvl,
                                lapack_complex_double* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_stgexc_work( int matrix_order, lapack_logical wantq,
                                lapack_logical wantz, lapack_int n, float* a,
                                lapack_int lda, float* b, lapack_int ldb,
                                float* q, lapack_int ldq, float* z,
                                lapack_int ldz, lapack_int* ifst,
                                lapack_int* ilst, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_dtgexc_work( int matrix_order, lapack_logical wantq,
                                lapack_logical wantz, lapack_int n, double* a,
                                lapack_int lda, double* b, lapack_int ldb,
                                double* q, lapack_int ldq, double* z,
                                lapack_int ldz, lapack_int* ifst,
                                lapack_int* ilst, double* work,
                                lapack_int lwork );
lapack_int LAPACKE_ctgexc_work( int matrix_order, lapack_logical wantq,
                                lapack_logical wantz, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* q, lapack_int ldq,
                                lapack_complex_float* z, lapack_int ldz,
                                lapack_int ifst, lapack_int ilst );
lapack_int LAPACKE_ztgexc_work( int matrix_order, lapack_logical wantq,
                                lapack_logical wantz, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* q, lapack_int ldq,
                                lapack_complex_double* z, lapack_int ldz,
                                lapack_int ifst, lapack_int ilst );

lapack_int LAPACKE_stgsen_work( int matrix_order, lapack_int ijob,
                                lapack_logical wantq, lapack_logical wantz,
                                const lapack_logical* select, lapack_int n,
                                float* a, lapack_int lda, float* b,
                                lapack_int ldb, float* alphar, float* alphai,
                                float* beta, float* q, lapack_int ldq, float* z,
                                lapack_int ldz, lapack_int* m, float* pl,
                                float* pr, float* dif, float* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int liwork );
lapack_int LAPACKE_dtgsen_work( int matrix_order, lapack_int ijob,
                                lapack_logical wantq, lapack_logical wantz,
                                const lapack_logical* select, lapack_int n,
                                double* a, lapack_int lda, double* b,
                                lapack_int ldb, double* alphar, double* alphai,
                                double* beta, double* q, lapack_int ldq,
                                double* z, lapack_int ldz, lapack_int* m,
                                double* pl, double* pr, double* dif,
                                double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_ctgsen_work( int matrix_order, lapack_int ijob,
                                lapack_logical wantq, lapack_logical wantz,
                                const lapack_logical* select, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* alpha,
                                lapack_complex_float* beta,
                                lapack_complex_float* q, lapack_int ldq,
                                lapack_complex_float* z, lapack_int ldz,
                                lapack_int* m, float* pl, float* pr, float* dif,
                                lapack_complex_float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_ztgsen_work( int matrix_order, lapack_int ijob,
                                lapack_logical wantq, lapack_logical wantz,
                                const lapack_logical* select, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* alpha,
                                lapack_complex_double* beta,
                                lapack_complex_double* q, lapack_int ldq,
                                lapack_complex_double* z, lapack_int ldz,
                                lapack_int* m, double* pl, double* pr,
                                double* dif, lapack_complex_double* work,
                                lapack_int lwork, lapack_int* iwork,
                                lapack_int liwork );

lapack_int LAPACKE_stgsja_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int p,
                                lapack_int n, lapack_int k, lapack_int l,
                                float* a, lapack_int lda, float* b,
                                lapack_int ldb, float tola, float tolb,
                                float* alpha, float* beta, float* u,
                                lapack_int ldu, float* v, lapack_int ldv,
                                float* q, lapack_int ldq, float* work,
                                lapack_int* ncycle );
lapack_int LAPACKE_dtgsja_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int p,
                                lapack_int n, lapack_int k, lapack_int l,
                                double* a, lapack_int lda, double* b,
                                lapack_int ldb, double tola, double tolb,
                                double* alpha, double* beta, double* u,
                                lapack_int ldu, double* v, lapack_int ldv,
                                double* q, lapack_int ldq, double* work,
                                lapack_int* ncycle );
lapack_int LAPACKE_ctgsja_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int p,
                                lapack_int n, lapack_int k, lapack_int l,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                float tola, float tolb, float* alpha,
                                float* beta, lapack_complex_float* u,
                                lapack_int ldu, lapack_complex_float* v,
                                lapack_int ldv, lapack_complex_float* q,
                                lapack_int ldq, lapack_complex_float* work,
                                lapack_int* ncycle );
lapack_int LAPACKE_ztgsja_work( int matrix_order, char jobu, char jobv,
                                char jobq, lapack_int m, lapack_int p,
                                lapack_int n, lapack_int k, lapack_int l,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                double tola, double tolb, double* alpha,
                                double* beta, lapack_complex_double* u,
                                lapack_int ldu, lapack_complex_double* v,
                                lapack_int ldv, lapack_complex_double* q,
                                lapack_int ldq, lapack_complex_double* work,
                                lapack_int* ncycle );

lapack_int LAPACKE_stgsna_work( int matrix_order, char job, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const float* a, lapack_int lda, const float* b,
                                lapack_int ldb, const float* vl,
                                lapack_int ldvl, const float* vr,
                                lapack_int ldvr, float* s, float* dif,
                                lapack_int mm, lapack_int* m, float* work,
                                lapack_int lwork, lapack_int* iwork );
lapack_int LAPACKE_dtgsna_work( int matrix_order, char job, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const double* a, lapack_int lda,
                                const double* b, lapack_int ldb,
                                const double* vl, lapack_int ldvl,
                                const double* vr, lapack_int ldvr, double* s,
                                double* dif, lapack_int mm, lapack_int* m,
                                double* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_ctgsna_work( int matrix_order, char job, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* b, lapack_int ldb,
                                const lapack_complex_float* vl, lapack_int ldvl,
                                const lapack_complex_float* vr, lapack_int ldvr,
                                float* s, float* dif, lapack_int mm,
                                lapack_int* m, lapack_complex_float* work,
                                lapack_int lwork, lapack_int* iwork );
lapack_int LAPACKE_ztgsna_work( int matrix_order, char job, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* b, lapack_int ldb,
                                const lapack_complex_double* vl,
                                lapack_int ldvl,
                                const lapack_complex_double* vr,
                                lapack_int ldvr, double* s, double* dif,
                                lapack_int mm, lapack_int* m,
                                lapack_complex_double* work, lapack_int lwork,
                                lapack_int* iwork );

lapack_int LAPACKE_stgsyl_work( int matrix_order, char trans, lapack_int ijob,
                                lapack_int m, lapack_int n, const float* a,
                                lapack_int lda, const float* b, lapack_int ldb,
                                float* c, lapack_int ldc, const float* d,
                                lapack_int ldd, const float* e, lapack_int lde,
                                float* f, lapack_int ldf, float* scale,
                                float* dif, float* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_dtgsyl_work( int matrix_order, char trans, lapack_int ijob,
                                lapack_int m, lapack_int n, const double* a,
                                lapack_int lda, const double* b, lapack_int ldb,
                                double* c, lapack_int ldc, const double* d,
                                lapack_int ldd, const double* e, lapack_int lde,
                                double* f, lapack_int ldf, double* scale,
                                double* dif, double* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_ctgsyl_work( int matrix_order, char trans, lapack_int ijob,
                                lapack_int m, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* c, lapack_int ldc,
                                const lapack_complex_float* d, lapack_int ldd,
                                const lapack_complex_float* e, lapack_int lde,
                                lapack_complex_float* f, lapack_int ldf,
                                float* scale, float* dif,
                                lapack_complex_float* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_ztgsyl_work( int matrix_order, char trans, lapack_int ijob,
                                lapack_int m, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* c, lapack_int ldc,
                                const lapack_complex_double* d, lapack_int ldd,
                                const lapack_complex_double* e, lapack_int lde,
                                lapack_complex_double* f, lapack_int ldf,
                                double* scale, double* dif,
                                lapack_complex_double* work, lapack_int lwork,
                                lapack_int* iwork );

lapack_int LAPACKE_stpcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n, const float* ap,
                                float* rcond, float* work, lapack_int* iwork );
lapack_int LAPACKE_dtpcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n, const double* ap,
                                double* rcond, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_ctpcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n,
                                const lapack_complex_float* ap, float* rcond,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_ztpcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n,
                                const lapack_complex_double* ap, double* rcond,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_stprfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const float* ap, const float* b, lapack_int ldb,
                                const float* x, lapack_int ldx, float* ferr,
                                float* berr, float* work, lapack_int* iwork );
lapack_int LAPACKE_dtprfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const double* ap, const double* b,
                                lapack_int ldb, const double* x, lapack_int ldx,
                                double* ferr, double* berr, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_ctprfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const lapack_complex_float* ap,
                                const lapack_complex_float* b, lapack_int ldb,
                                const lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_ztprfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const lapack_complex_double* ap,
                                const lapack_complex_double* b, lapack_int ldb,
                                const lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_stptri_work( int matrix_order, char uplo, char diag,
                                lapack_int n, float* ap );
lapack_int LAPACKE_dtptri_work( int matrix_order, char uplo, char diag,
                                lapack_int n, double* ap );
lapack_int LAPACKE_ctptri_work( int matrix_order, char uplo, char diag,
                                lapack_int n, lapack_complex_float* ap );
lapack_int LAPACKE_ztptri_work( int matrix_order, char uplo, char diag,
                                lapack_int n, lapack_complex_double* ap );

lapack_int LAPACKE_stptrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const float* ap, float* b, lapack_int ldb );
lapack_int LAPACKE_dtptrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const double* ap, double* b, lapack_int ldb );
lapack_int LAPACKE_ctptrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const lapack_complex_float* ap,
                                lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_ztptrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const lapack_complex_double* ap,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_stpttf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const float* ap, float* arf );
lapack_int LAPACKE_dtpttf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const double* ap, double* arf );
lapack_int LAPACKE_ctpttf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const lapack_complex_float* ap,
                                lapack_complex_float* arf );
lapack_int LAPACKE_ztpttf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const lapack_complex_double* ap,
                                lapack_complex_double* arf );

lapack_int LAPACKE_stpttr_work( int matrix_order, char uplo, lapack_int n,
                                const float* ap, float* a, lapack_int lda );
lapack_int LAPACKE_dtpttr_work( int matrix_order, char uplo, lapack_int n,
                                const double* ap, double* a, lapack_int lda );
lapack_int LAPACKE_ctpttr_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_float* ap,
                                lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_ztpttr_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_double* ap,
                                lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_strcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n, const float* a,
                                lapack_int lda, float* rcond, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dtrcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n, const double* a,
                                lapack_int lda, double* rcond, double* work,
                                lapack_int* iwork );
lapack_int LAPACKE_ctrcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                float* rcond, lapack_complex_float* work,
                                float* rwork );
lapack_int LAPACKE_ztrcon_work( int matrix_order, char norm, char uplo,
                                char diag, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                double* rcond, lapack_complex_double* work,
                                double* rwork );

lapack_int LAPACKE_strevc_work( int matrix_order, char side, char howmny,
                                lapack_logical* select, lapack_int n,
                                const float* t, lapack_int ldt, float* vl,
                                lapack_int ldvl, float* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m, float* work );
lapack_int LAPACKE_dtrevc_work( int matrix_order, char side, char howmny,
                                lapack_logical* select, lapack_int n,
                                const double* t, lapack_int ldt, double* vl,
                                lapack_int ldvl, double* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m, double* work );
lapack_int LAPACKE_ctrevc_work( int matrix_order, char side, char howmny,
                                const lapack_logical* select, lapack_int n,
                                lapack_complex_float* t, lapack_int ldt,
                                lapack_complex_float* vl, lapack_int ldvl,
                                lapack_complex_float* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_ztrevc_work( int matrix_order, char side, char howmny,
                                const lapack_logical* select, lapack_int n,
                                lapack_complex_double* t, lapack_int ldt,
                                lapack_complex_double* vl, lapack_int ldvl,
                                lapack_complex_double* vr, lapack_int ldvr,
                                lapack_int mm, lapack_int* m,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_strexc_work( int matrix_order, char compq, lapack_int n,
                                float* t, lapack_int ldt, float* q,
                                lapack_int ldq, lapack_int* ifst,
                                lapack_int* ilst, float* work );
lapack_int LAPACKE_dtrexc_work( int matrix_order, char compq, lapack_int n,
                                double* t, lapack_int ldt, double* q,
                                lapack_int ldq, lapack_int* ifst,
                                lapack_int* ilst, double* work );
lapack_int LAPACKE_ctrexc_work( int matrix_order, char compq, lapack_int n,
                                lapack_complex_float* t, lapack_int ldt,
                                lapack_complex_float* q, lapack_int ldq,
                                lapack_int ifst, lapack_int ilst );
lapack_int LAPACKE_ztrexc_work( int matrix_order, char compq, lapack_int n,
                                lapack_complex_double* t, lapack_int ldt,
                                lapack_complex_double* q, lapack_int ldq,
                                lapack_int ifst, lapack_int ilst );

lapack_int LAPACKE_strrfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const float* a, lapack_int lda, const float* b,
                                lapack_int ldb, const float* x, lapack_int ldx,
                                float* ferr, float* berr, float* work,
                                lapack_int* iwork );
lapack_int LAPACKE_dtrrfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const double* a, lapack_int lda,
                                const double* b, lapack_int ldb,
                                const double* x, lapack_int ldx, double* ferr,
                                double* berr, double* work, lapack_int* iwork );
lapack_int LAPACKE_ctrrfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* b, lapack_int ldb,
                                const lapack_complex_float* x, lapack_int ldx,
                                float* ferr, float* berr,
                                lapack_complex_float* work, float* rwork );
lapack_int LAPACKE_ztrrfs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* b, lapack_int ldb,
                                const lapack_complex_double* x, lapack_int ldx,
                                double* ferr, double* berr,
                                lapack_complex_double* work, double* rwork );

lapack_int LAPACKE_strsen_work( int matrix_order, char job, char compq,
                                const lapack_logical* select, lapack_int n,
                                float* t, lapack_int ldt, float* q,
                                lapack_int ldq, float* wr, float* wi,
                                lapack_int* m, float* s, float* sep,
                                float* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_dtrsen_work( int matrix_order, char job, char compq,
                                const lapack_logical* select, lapack_int n,
                                double* t, lapack_int ldt, double* q,
                                lapack_int ldq, double* wr, double* wi,
                                lapack_int* m, double* s, double* sep,
                                double* work, lapack_int lwork,
                                lapack_int* iwork, lapack_int liwork );
lapack_int LAPACKE_ctrsen_work( int matrix_order, char job, char compq,
                                const lapack_logical* select, lapack_int n,
                                lapack_complex_float* t, lapack_int ldt,
                                lapack_complex_float* q, lapack_int ldq,
                                lapack_complex_float* w, lapack_int* m,
                                float* s, float* sep,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_ztrsen_work( int matrix_order, char job, char compq,
                                const lapack_logical* select, lapack_int n,
                                lapack_complex_double* t, lapack_int ldt,
                                lapack_complex_double* q, lapack_int ldq,
                                lapack_complex_double* w, lapack_int* m,
                                double* s, double* sep,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_strsna_work( int matrix_order, char job, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const float* t, lapack_int ldt, const float* vl,
                                lapack_int ldvl, const float* vr,
                                lapack_int ldvr, float* s, float* sep,
                                lapack_int mm, lapack_int* m, float* work,
                                lapack_int ldwork, lapack_int* iwork );
lapack_int LAPACKE_dtrsna_work( int matrix_order, char job, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const double* t, lapack_int ldt,
                                const double* vl, lapack_int ldvl,
                                const double* vr, lapack_int ldvr, double* s,
                                double* sep, lapack_int mm, lapack_int* m,
                                double* work, lapack_int ldwork,
                                lapack_int* iwork );
lapack_int LAPACKE_ctrsna_work( int matrix_order, char job, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const lapack_complex_float* t, lapack_int ldt,
                                const lapack_complex_float* vl, lapack_int ldvl,
                                const lapack_complex_float* vr, lapack_int ldvr,
                                float* s, float* sep, lapack_int mm,
                                lapack_int* m, lapack_complex_float* work,
                                lapack_int ldwork, float* rwork );
lapack_int LAPACKE_ztrsna_work( int matrix_order, char job, char howmny,
                                const lapack_logical* select, lapack_int n,
                                const lapack_complex_double* t, lapack_int ldt,
                                const lapack_complex_double* vl,
                                lapack_int ldvl,
                                const lapack_complex_double* vr,
                                lapack_int ldvr, double* s, double* sep,
                                lapack_int mm, lapack_int* m,
                                lapack_complex_double* work, lapack_int ldwork,
                                double* rwork );

lapack_int LAPACKE_strsyl_work( int matrix_order, char trana, char tranb,
                                lapack_int isgn, lapack_int m, lapack_int n,
                                const float* a, lapack_int lda, const float* b,
                                lapack_int ldb, float* c, lapack_int ldc,
                                float* scale );
lapack_int LAPACKE_dtrsyl_work( int matrix_order, char trana, char tranb,
                                lapack_int isgn, lapack_int m, lapack_int n,
                                const double* a, lapack_int lda,
                                const double* b, lapack_int ldb, double* c,
                                lapack_int ldc, double* scale );
lapack_int LAPACKE_ctrsyl_work( int matrix_order, char trana, char tranb,
                                lapack_int isgn, lapack_int m, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* c, lapack_int ldc,
                                float* scale );
lapack_int LAPACKE_ztrsyl_work( int matrix_order, char trana, char tranb,
                                lapack_int isgn, lapack_int m, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* c, lapack_int ldc,
                                double* scale );

lapack_int LAPACKE_strtri_work( int matrix_order, char uplo, char diag,
                                lapack_int n, float* a, lapack_int lda );
lapack_int LAPACKE_dtrtri_work( int matrix_order, char uplo, char diag,
                                lapack_int n, double* a, lapack_int lda );
lapack_int LAPACKE_ctrtri_work( int matrix_order, char uplo, char diag,
                                lapack_int n, lapack_complex_float* a,
                                lapack_int lda );
lapack_int LAPACKE_ztrtri_work( int matrix_order, char uplo, char diag,
                                lapack_int n, lapack_complex_double* a,
                                lapack_int lda );

lapack_int LAPACKE_strtrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const float* a, lapack_int lda, float* b,
                                lapack_int ldb );
lapack_int LAPACKE_dtrtrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const double* a, lapack_int lda, double* b,
                                lapack_int ldb );
lapack_int LAPACKE_ctrtrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_ztrtrs_work( int matrix_order, char uplo, char trans,
                                char diag, lapack_int n, lapack_int nrhs,
                                const lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_strttf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const float* a, lapack_int lda,
                                float* arf );
lapack_int LAPACKE_dtrttf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const double* a, lapack_int lda,
                                double* arf );
lapack_int LAPACKE_ctrttf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* arf );
lapack_int LAPACKE_ztrttf_work( int matrix_order, char transr, char uplo,
                                lapack_int n, const lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* arf );

lapack_int LAPACKE_strttp_work( int matrix_order, char uplo, lapack_int n,
                                const float* a, lapack_int lda, float* ap );
lapack_int LAPACKE_dtrttp_work( int matrix_order, char uplo, lapack_int n,
                                const double* a, lapack_int lda, double* ap );
lapack_int LAPACKE_ctrttp_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* ap );
lapack_int LAPACKE_ztrttp_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* ap );

lapack_int LAPACKE_stzrzf_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, float* tau,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_dtzrzf_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, double* tau,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_ctzrzf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_ztzrzf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cungbr_work( int matrix_order, char vect, lapack_int m,
                                lapack_int n, lapack_int k,
                                lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zungbr_work( int matrix_order, char vect, lapack_int m,
                                lapack_int n, lapack_int k,
                                lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cunghr_work( int matrix_order, lapack_int n, lapack_int ilo,
                                lapack_int ihi, lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zunghr_work( int matrix_order, lapack_int n, lapack_int ilo,
                                lapack_int ihi, lapack_complex_double* a,
                                lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cunglq_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zunglq_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, lapack_complex_double* a,
                                lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cungql_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zungql_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, lapack_complex_double* a,
                                lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cungqr_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zungqr_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, lapack_complex_double* a,
                                lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cungrq_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zungrq_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int k, lapack_complex_double* a,
                                lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cungtr_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zungtr_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cunmbr_work( int matrix_order, char vect, char side,
                                char trans, lapack_int m, lapack_int n,
                                lapack_int k, const lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zunmbr_work( int matrix_order, char vect, char side,
                                char trans, lapack_int m, lapack_int n,
                                lapack_int k, const lapack_complex_double* a,
                                lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cunmhr_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int ilo,
                                lapack_int ihi, const lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zunmhr_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int ilo,
                                lapack_int ihi, const lapack_complex_double* a,
                                lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cunmql_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zunmql_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cunmqr_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zunmqr_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cunmrq_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zunmrq_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cunmrz_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                lapack_int l, const lapack_complex_float* a,
                                lapack_int lda, const lapack_complex_float* tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zunmrz_work( int matrix_order, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                lapack_int l, const lapack_complex_double* a,
                                lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cunmtr_work( int matrix_order, char side, char uplo,
                                char trans, lapack_int m, lapack_int n,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zunmtr_work( int matrix_order, char side, char uplo,
                                char trans, lapack_int m, lapack_int n,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work, lapack_int lwork );

lapack_int LAPACKE_cupgtr_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_float* ap,
                                const lapack_complex_float* tau,
                                lapack_complex_float* q, lapack_int ldq,
                                lapack_complex_float* work );
lapack_int LAPACKE_zupgtr_work( int matrix_order, char uplo, lapack_int n,
                                const lapack_complex_double* ap,
                                const lapack_complex_double* tau,
                                lapack_complex_double* q, lapack_int ldq,
                                lapack_complex_double* work );

lapack_int LAPACKE_cupmtr_work( int matrix_order, char side, char uplo,
                                char trans, lapack_int m, lapack_int n,
                                const lapack_complex_float* ap,
                                const lapack_complex_float* tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work );
lapack_int LAPACKE_zupmtr_work( int matrix_order, char side, char uplo,
                                char trans, lapack_int m, lapack_int n,
                                const lapack_complex_double* ap,
                                const lapack_complex_double* tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work );

lapack_int LAPACKE_claghe( int matrix_order, lapack_int n, lapack_int k,
                           const float* d, lapack_complex_float* a,
                           lapack_int lda, lapack_int* iseed );
lapack_int LAPACKE_zlaghe( int matrix_order, lapack_int n, lapack_int k,
                           const double* d, lapack_complex_double* a,
                           lapack_int lda, lapack_int* iseed );

lapack_int LAPACKE_slagsy( int matrix_order, lapack_int n, lapack_int k,
                           const float* d, float* a, lapack_int lda,
                           lapack_int* iseed );
lapack_int LAPACKE_dlagsy( int matrix_order, lapack_int n, lapack_int k,
                           const double* d, double* a, lapack_int lda,
                           lapack_int* iseed );
lapack_int LAPACKE_clagsy( int matrix_order, lapack_int n, lapack_int k,
                           const float* d, lapack_complex_float* a,
                           lapack_int lda, lapack_int* iseed );
lapack_int LAPACKE_zlagsy( int matrix_order, lapack_int n, lapack_int k,
                           const double* d, lapack_complex_double* a,
                           lapack_int lda, lapack_int* iseed );

lapack_int LAPACKE_slapmr( int matrix_order, lapack_logical forwrd,
                           lapack_int m, lapack_int n, float* x, lapack_int ldx,
                           lapack_int* k );
lapack_int LAPACKE_dlapmr( int matrix_order, lapack_logical forwrd,
                           lapack_int m, lapack_int n, double* x,
                           lapack_int ldx, lapack_int* k );
lapack_int LAPACKE_clapmr( int matrix_order, lapack_logical forwrd,
                           lapack_int m, lapack_int n, lapack_complex_float* x,
                           lapack_int ldx, lapack_int* k );
lapack_int LAPACKE_zlapmr( int matrix_order, lapack_logical forwrd,
                           lapack_int m, lapack_int n, lapack_complex_double* x,
                           lapack_int ldx, lapack_int* k );


float LAPACKE_slapy2( float x, float y );
double LAPACKE_dlapy2( double x, double y );

float LAPACKE_slapy3( float x, float y, float z );
double LAPACKE_dlapy3( double x, double y, double z );

lapack_int LAPACKE_slartgp( float f, float g, float* cs, float* sn, float* r );
lapack_int LAPACKE_dlartgp( double f, double g, double* cs, double* sn,
                            double* r );

lapack_int LAPACKE_slartgs( float x, float y, float sigma, float* cs,
                            float* sn );
lapack_int LAPACKE_dlartgs( double x, double y, double sigma, double* cs,
                            double* sn );


//LAPACK 3.3.0
lapack_int LAPACKE_cbbcsd( int matrix_order, char jobu1, char jobu2,
                           char jobv1t, char jobv2t, char trans, lapack_int m,
                           lapack_int p, lapack_int q, float* theta, float* phi,
                           lapack_complex_float* u1, lapack_int ldu1,
                           lapack_complex_float* u2, lapack_int ldu2,
                           lapack_complex_float* v1t, lapack_int ldv1t,
                           lapack_complex_float* v2t, lapack_int ldv2t,
                           float* b11d, float* b11e, float* b12d, float* b12e,
                           float* b21d, float* b21e, float* b22d, float* b22e );
lapack_int LAPACKE_cbbcsd_work( int matrix_order, char jobu1, char jobu2,
                                char jobv1t, char jobv2t, char trans,
                                lapack_int m, lapack_int p, lapack_int q,
                                float* theta, float* phi,
                                lapack_complex_float* u1, lapack_int ldu1,
                                lapack_complex_float* u2, lapack_int ldu2,
                                lapack_complex_float* v1t, lapack_int ldv1t,
                                lapack_complex_float* v2t, lapack_int ldv2t,
                                float* b11d, float* b11e, float* b12d,
                                float* b12e, float* b21d, float* b21e,
                                float* b22d, float* b22e, float* rwork,
                                lapack_int lrwork );
lapack_int LAPACKE_cheswapr( int matrix_order, char uplo, lapack_int n,
                             lapack_complex_float* a, lapack_int i1,
                             lapack_int i2 );
lapack_int LAPACKE_cheswapr_work( int matrix_order, char uplo, lapack_int n,
                                  lapack_complex_float* a, lapack_int i1,
                                  lapack_int i2 );
lapack_int LAPACKE_chetri2( int matrix_order, char uplo, lapack_int n,
                            lapack_complex_float* a, lapack_int lda,
                            const lapack_int* ipiv );
lapack_int LAPACKE_chetri2_work( int matrix_order, char uplo, lapack_int n,
                                 lapack_complex_float* a, lapack_int lda,
                                 const lapack_int* ipiv,
                                 lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_chetri2x( int matrix_order, char uplo, lapack_int n,
                             lapack_complex_float* a, lapack_int lda,
                             const lapack_int* ipiv, lapack_int nb );
lapack_int LAPACKE_chetri2x_work( int matrix_order, char uplo, lapack_int n,
                                  lapack_complex_float* a, lapack_int lda,
                                  const lapack_int* ipiv,
                                  lapack_complex_float* work, lapack_int nb );
lapack_int LAPACKE_chetrs2( int matrix_order, char uplo, lapack_int n,
                            lapack_int nrhs, const lapack_complex_float* a,
                            lapack_int lda, const lapack_int* ipiv,
                            lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_chetrs2_work( int matrix_order, char uplo, lapack_int n,
                                 lapack_int nrhs, const lapack_complex_float* a,
                                 lapack_int lda, const lapack_int* ipiv,
                                 lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* work );
lapack_int LAPACKE_csyconv( int matrix_order, char uplo, char way, lapack_int n,
                            lapack_complex_float* a, lapack_int lda,
                            const lapack_int* ipiv );
lapack_int LAPACKE_csyconv_work( int matrix_order, char uplo, char way,
                                 lapack_int n, lapack_complex_float* a,
                                 lapack_int lda, const lapack_int* ipiv,
                                 lapack_complex_float* work );
lapack_int LAPACKE_csyswapr( int matrix_order, char uplo, lapack_int n,
                             lapack_complex_float* a, lapack_int i1,
                             lapack_int i2 );
lapack_int LAPACKE_csyswapr_work( int matrix_order, char uplo, lapack_int n,
                                  lapack_complex_float* a, lapack_int i1,
                                  lapack_int i2 );
lapack_int LAPACKE_csytri2( int matrix_order, char uplo, lapack_int n,
                            lapack_complex_float* a, lapack_int lda,
                            const lapack_int* ipiv );
lapack_int LAPACKE_csytri2_work( int matrix_order, char uplo, lapack_int n,
                                 lapack_complex_float* a, lapack_int lda,
                                 const lapack_int* ipiv,
                                 lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_csytri2x( int matrix_order, char uplo, lapack_int n,
                             lapack_complex_float* a, lapack_int lda,
                             const lapack_int* ipiv, lapack_int nb );
lapack_int LAPACKE_csytri2x_work( int matrix_order, char uplo, lapack_int n,
                                  lapack_complex_float* a, lapack_int lda,
                                  const lapack_int* ipiv,
                                  lapack_complex_float* work, lapack_int nb );
lapack_int LAPACKE_csytrs2( int matrix_order, char uplo, lapack_int n,
                            lapack_int nrhs, const lapack_complex_float* a,
                            lapack_int lda, const lapack_int* ipiv,
                            lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_csytrs2_work( int matrix_order, char uplo, lapack_int n,
                                 lapack_int nrhs, const lapack_complex_float* a,
                                 lapack_int lda, const lapack_int* ipiv,
                                 lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* work );
lapack_int LAPACKE_cunbdb( int matrix_order, char trans, char signs,
                           lapack_int m, lapack_int p, lapack_int q,
                           lapack_complex_float* x11, lapack_int ldx11,
                           lapack_complex_float* x12, lapack_int ldx12,
                           lapack_complex_float* x21, lapack_int ldx21,
                           lapack_complex_float* x22, lapack_int ldx22,
                           float* theta, float* phi,
                           lapack_complex_float* taup1,
                           lapack_complex_float* taup2,
                           lapack_complex_float* tauq1,
                           lapack_complex_float* tauq2 );
lapack_int LAPACKE_cunbdb_work( int matrix_order, char trans, char signs,
                                lapack_int m, lapack_int p, lapack_int q,
                                lapack_complex_float* x11, lapack_int ldx11,
                                lapack_complex_float* x12, lapack_int ldx12,
                                lapack_complex_float* x21, lapack_int ldx21,
                                lapack_complex_float* x22, lapack_int ldx22,
                                float* theta, float* phi,
                                lapack_complex_float* taup1,
                                lapack_complex_float* taup2,
                                lapack_complex_float* tauq1,
                                lapack_complex_float* tauq2,
                                lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_cuncsd( int matrix_order, char jobu1, char jobu2,
                           char jobv1t, char jobv2t, char trans, char signs,
                           lapack_int m, lapack_int p, lapack_int q,
                           lapack_complex_float* x11, lapack_int ldx11,
                           lapack_complex_float* x12, lapack_int ldx12,
                           lapack_complex_float* x21, lapack_int ldx21,
                           lapack_complex_float* x22, lapack_int ldx22,
                           float* theta, lapack_complex_float* u1,
                           lapack_int ldu1, lapack_complex_float* u2,
                           lapack_int ldu2, lapack_complex_float* v1t,
                           lapack_int ldv1t, lapack_complex_float* v2t,
                           lapack_int ldv2t );
lapack_int LAPACKE_cuncsd_work( int matrix_order, char jobu1, char jobu2,
                                char jobv1t, char jobv2t, char trans,
                                char signs, lapack_int m, lapack_int p,
                                lapack_int q, lapack_complex_float* x11,
                                lapack_int ldx11, lapack_complex_float* x12,
                                lapack_int ldx12, lapack_complex_float* x21,
                                lapack_int ldx21, lapack_complex_float* x22,
                                lapack_int ldx22, float* theta,
                                lapack_complex_float* u1, lapack_int ldu1,
                                lapack_complex_float* u2, lapack_int ldu2,
                                lapack_complex_float* v1t, lapack_int ldv1t,
                                lapack_complex_float* v2t, lapack_int ldv2t,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork, lapack_int lrwork,
                                lapack_int* iwork );
lapack_int LAPACKE_dbbcsd( int matrix_order, char jobu1, char jobu2,
                           char jobv1t, char jobv2t, char trans, lapack_int m,
                           lapack_int p, lapack_int q, double* theta,
                           double* phi, double* u1, lapack_int ldu1, double* u2,
                           lapack_int ldu2, double* v1t, lapack_int ldv1t,
                           double* v2t, lapack_int ldv2t, double* b11d,
                           double* b11e, double* b12d, double* b12e,
                           double* b21d, double* b21e, double* b22d,
                           double* b22e );
lapack_int LAPACKE_dbbcsd_work( int matrix_order, char jobu1, char jobu2,
                                char jobv1t, char jobv2t, char trans,
                                lapack_int m, lapack_int p, lapack_int q,
                                double* theta, double* phi, double* u1,
                                lapack_int ldu1, double* u2, lapack_int ldu2,
                                double* v1t, lapack_int ldv1t, double* v2t,
                                lapack_int ldv2t, double* b11d, double* b11e,
                                double* b12d, double* b12e, double* b21d,
                                double* b21e, double* b22d, double* b22e,
                                double* work, lapack_int lwork );
lapack_int LAPACKE_dorbdb( int matrix_order, char trans, char signs,
                           lapack_int m, lapack_int p, lapack_int q,
                           double* x11, lapack_int ldx11, double* x12,
                           lapack_int ldx12, double* x21, lapack_int ldx21,
                           double* x22, lapack_int ldx22, double* theta,
                           double* phi, double* taup1, double* taup2,
                           double* tauq1, double* tauq2 );
lapack_int LAPACKE_dorbdb_work( int matrix_order, char trans, char signs,
                                lapack_int m, lapack_int p, lapack_int q,
                                double* x11, lapack_int ldx11, double* x12,
                                lapack_int ldx12, double* x21, lapack_int ldx21,
                                double* x22, lapack_int ldx22, double* theta,
                                double* phi, double* taup1, double* taup2,
                                double* tauq1, double* tauq2, double* work,
                                lapack_int lwork );
lapack_int LAPACKE_dorcsd( int matrix_order, char jobu1, char jobu2,
                           char jobv1t, char jobv2t, char trans, char signs,
                           lapack_int m, lapack_int p, lapack_int q,
                           double* x11, lapack_int ldx11, double* x12,
                           lapack_int ldx12, double* x21, lapack_int ldx21,
                           double* x22, lapack_int ldx22, double* theta,
                           double* u1, lapack_int ldu1, double* u2,
                           lapack_int ldu2, double* v1t, lapack_int ldv1t,
                           double* v2t, lapack_int ldv2t );
lapack_int LAPACKE_dorcsd_work( int matrix_order, char jobu1, char jobu2,
                                char jobv1t, char jobv2t, char trans,
                                char signs, lapack_int m, lapack_int p,
                                lapack_int q, double* x11, lapack_int ldx11,
                                double* x12, lapack_int ldx12, double* x21,
                                lapack_int ldx21, double* x22, lapack_int ldx22,
                                double* theta, double* u1, lapack_int ldu1,
                                double* u2, lapack_int ldu2, double* v1t,
                                lapack_int ldv1t, double* v2t, lapack_int ldv2t,
                                double* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_dsyconv( int matrix_order, char uplo, char way, lapack_int n,
                            double* a, lapack_int lda, const lapack_int* ipiv );
lapack_int LAPACKE_dsyconv_work( int matrix_order, char uplo, char way,
                                 lapack_int n, double* a, lapack_int lda,
                                 const lapack_int* ipiv, double* work );
lapack_int LAPACKE_dsyswapr( int matrix_order, char uplo, lapack_int n,
                             double* a, lapack_int i1, lapack_int i2 );
lapack_int LAPACKE_dsyswapr_work( int matrix_order, char uplo, lapack_int n,
                                  double* a, lapack_int i1, lapack_int i2 );
lapack_int LAPACKE_dsytri2( int matrix_order, char uplo, lapack_int n,
                            double* a, lapack_int lda, const lapack_int* ipiv );
lapack_int LAPACKE_dsytri2_work( int matrix_order, char uplo, lapack_int n,
                                 double* a, lapack_int lda,
                                 const lapack_int* ipiv,
                                 lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_dsytri2x( int matrix_order, char uplo, lapack_int n,
                             double* a, lapack_int lda, const lapack_int* ipiv,
                             lapack_int nb );
lapack_int LAPACKE_dsytri2x_work( int matrix_order, char uplo, lapack_int n,
                                  double* a, lapack_int lda,
                                  const lapack_int* ipiv, double* work,
                                  lapack_int nb );
lapack_int LAPACKE_dsytrs2( int matrix_order, char uplo, lapack_int n,
                            lapack_int nrhs, const double* a, lapack_int lda,
                            const lapack_int* ipiv, double* b, lapack_int ldb );
lapack_int LAPACKE_dsytrs2_work( int matrix_order, char uplo, lapack_int n,
                                 lapack_int nrhs, const double* a,
                                 lapack_int lda, const lapack_int* ipiv,
                                 double* b, lapack_int ldb, double* work );
lapack_int LAPACKE_sbbcsd( int matrix_order, char jobu1, char jobu2,
                           char jobv1t, char jobv2t, char trans, lapack_int m,
                           lapack_int p, lapack_int q, float* theta, float* phi,
                           float* u1, lapack_int ldu1, float* u2,
                           lapack_int ldu2, float* v1t, lapack_int ldv1t,
                           float* v2t, lapack_int ldv2t, float* b11d,
                           float* b11e, float* b12d, float* b12e, float* b21d,
                           float* b21e, float* b22d, float* b22e );
lapack_int LAPACKE_sbbcsd_work( int matrix_order, char jobu1, char jobu2,
                                char jobv1t, char jobv2t, char trans,
                                lapack_int m, lapack_int p, lapack_int q,
                                float* theta, float* phi, float* u1,
                                lapack_int ldu1, float* u2, lapack_int ldu2,
                                float* v1t, lapack_int ldv1t, float* v2t,
                                lapack_int ldv2t, float* b11d, float* b11e,
                                float* b12d, float* b12e, float* b21d,
                                float* b21e, float* b22d, float* b22e,
                                float* work, lapack_int lwork );
lapack_int LAPACKE_sorbdb( int matrix_order, char trans, char signs,
                           lapack_int m, lapack_int p, lapack_int q, float* x11,
                           lapack_int ldx11, float* x12, lapack_int ldx12,
                           float* x21, lapack_int ldx21, float* x22,
                           lapack_int ldx22, float* theta, float* phi,
                           float* taup1, float* taup2, float* tauq1,
                           float* tauq2 );
lapack_int LAPACKE_sorbdb_work( int matrix_order, char trans, char signs,
                                lapack_int m, lapack_int p, lapack_int q,
                                float* x11, lapack_int ldx11, float* x12,
                                lapack_int ldx12, float* x21, lapack_int ldx21,
                                float* x22, lapack_int ldx22, float* theta,
                                float* phi, float* taup1, float* taup2,
                                float* tauq1, float* tauq2, float* work,
                                lapack_int lwork );
lapack_int LAPACKE_sorcsd( int matrix_order, char jobu1, char jobu2,
                           char jobv1t, char jobv2t, char trans, char signs,
                           lapack_int m, lapack_int p, lapack_int q, float* x11,
                           lapack_int ldx11, float* x12, lapack_int ldx12,
                           float* x21, lapack_int ldx21, float* x22,
                           lapack_int ldx22, float* theta, float* u1,
                           lapack_int ldu1, float* u2, lapack_int ldu2,
                           float* v1t, lapack_int ldv1t, float* v2t,
                           lapack_int ldv2t );
lapack_int LAPACKE_sorcsd_work( int matrix_order, char jobu1, char jobu2,
                                char jobv1t, char jobv2t, char trans,
                                char signs, lapack_int m, lapack_int p,
                                lapack_int q, float* x11, lapack_int ldx11,
                                float* x12, lapack_int ldx12, float* x21,
                                lapack_int ldx21, float* x22, lapack_int ldx22,
                                float* theta, float* u1, lapack_int ldu1,
                                float* u2, lapack_int ldu2, float* v1t,
                                lapack_int ldv1t, float* v2t, lapack_int ldv2t,
                                float* work, lapack_int lwork,
                                lapack_int* iwork );
lapack_int LAPACKE_ssyconv( int matrix_order, char uplo, char way, lapack_int n,
                            float* a, lapack_int lda, const lapack_int* ipiv );
lapack_int LAPACKE_ssyconv_work( int matrix_order, char uplo, char way,
                                 lapack_int n, float* a, lapack_int lda,
                                 const lapack_int* ipiv, float* work );
lapack_int LAPACKE_ssyswapr( int matrix_order, char uplo, lapack_int n,
                             float* a, lapack_int i1, lapack_int i2 );
lapack_int LAPACKE_ssyswapr_work( int matrix_order, char uplo, lapack_int n,
                                  float* a, lapack_int i1, lapack_int i2 );
lapack_int LAPACKE_ssytri2( int matrix_order, char uplo, lapack_int n, float* a,
                            lapack_int lda, const lapack_int* ipiv );
lapack_int LAPACKE_ssytri2_work( int matrix_order, char uplo, lapack_int n,
                                 float* a, lapack_int lda,
                                 const lapack_int* ipiv,
                                 lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_ssytri2x( int matrix_order, char uplo, lapack_int n,
                             float* a, lapack_int lda, const lapack_int* ipiv,
                             lapack_int nb );
lapack_int LAPACKE_ssytri2x_work( int matrix_order, char uplo, lapack_int n,
                                  float* a, lapack_int lda,
                                  const lapack_int* ipiv, float* work,
                                  lapack_int nb );
lapack_int LAPACKE_ssytrs2( int matrix_order, char uplo, lapack_int n,
                            lapack_int nrhs, const float* a, lapack_int lda,
                            const lapack_int* ipiv, float* b, lapack_int ldb );
lapack_int LAPACKE_ssytrs2_work( int matrix_order, char uplo, lapack_int n,
                                 lapack_int nrhs, const float* a,
                                 lapack_int lda, const lapack_int* ipiv,
                                 float* b, lapack_int ldb, float* work );
lapack_int LAPACKE_zbbcsd( int matrix_order, char jobu1, char jobu2,
                           char jobv1t, char jobv2t, char trans, lapack_int m,
                           lapack_int p, lapack_int q, double* theta,
                           double* phi, lapack_complex_double* u1,
                           lapack_int ldu1, lapack_complex_double* u2,
                           lapack_int ldu2, lapack_complex_double* v1t,
                           lapack_int ldv1t, lapack_complex_double* v2t,
                           lapack_int ldv2t, double* b11d, double* b11e,
                           double* b12d, double* b12e, double* b21d,
                           double* b21e, double* b22d, double* b22e );
lapack_int LAPACKE_zbbcsd_work( int matrix_order, char jobu1, char jobu2,
                                char jobv1t, char jobv2t, char trans,
                                lapack_int m, lapack_int p, lapack_int q,
                                double* theta, double* phi,
                                lapack_complex_double* u1, lapack_int ldu1,
                                lapack_complex_double* u2, lapack_int ldu2,
                                lapack_complex_double* v1t, lapack_int ldv1t,
                                lapack_complex_double* v2t, lapack_int ldv2t,
                                double* b11d, double* b11e, double* b12d,
                                double* b12e, double* b21d, double* b21e,
                                double* b22d, double* b22e, double* rwork,
                                lapack_int lrwork );
lapack_int LAPACKE_zheswapr( int matrix_order, char uplo, lapack_int n,
                             lapack_complex_double* a, lapack_int i1,
                             lapack_int i2 );
lapack_int LAPACKE_zheswapr_work( int matrix_order, char uplo, lapack_int n,
                                  lapack_complex_double* a, lapack_int i1,
                                  lapack_int i2 );
lapack_int LAPACKE_zhetri2( int matrix_order, char uplo, lapack_int n,
                            lapack_complex_double* a, lapack_int lda,
                            const lapack_int* ipiv );
lapack_int LAPACKE_zhetri2_work( int matrix_order, char uplo, lapack_int n,
                                 lapack_complex_double* a, lapack_int lda,
                                 const lapack_int* ipiv,
                                 lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_zhetri2x( int matrix_order, char uplo, lapack_int n,
                             lapack_complex_double* a, lapack_int lda,
                             const lapack_int* ipiv, lapack_int nb );
lapack_int LAPACKE_zhetri2x_work( int matrix_order, char uplo, lapack_int n,
                                  lapack_complex_double* a, lapack_int lda,
                                  const lapack_int* ipiv,
                                  lapack_complex_double* work, lapack_int nb );
lapack_int LAPACKE_zhetrs2( int matrix_order, char uplo, lapack_int n,
                            lapack_int nrhs, const lapack_complex_double* a,
                            lapack_int lda, const lapack_int* ipiv,
                            lapack_complex_double* b, lapack_int ldb );
lapack_int LAPACKE_zhetrs2_work( int matrix_order, char uplo, lapack_int n,
                                 lapack_int nrhs, const lapack_complex_double* a,
                                 lapack_int lda, const lapack_int* ipiv,
                                 lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* work );
lapack_int LAPACKE_zsyconv( int matrix_order, char uplo, char way, lapack_int n,
                            lapack_complex_double* a, lapack_int lda,
                            const lapack_int* ipiv );
lapack_int LAPACKE_zsyconv_work( int matrix_order, char uplo, char way,
                                 lapack_int n, lapack_complex_double* a,
                                 lapack_int lda, const lapack_int* ipiv,
                                 lapack_complex_double* work );
lapack_int LAPACKE_zsyswapr( int matrix_order, char uplo, lapack_int n,
                             lapack_complex_double* a, lapack_int i1,
                             lapack_int i2 );
lapack_int LAPACKE_zsyswapr_work( int matrix_order, char uplo, lapack_int n,
                                  lapack_complex_double* a, lapack_int i1,
                                  lapack_int i2 );
lapack_int LAPACKE_zsytri2( int matrix_order, char uplo, lapack_int n,
                            lapack_complex_double* a, lapack_int lda,
                            const lapack_int* ipiv );
lapack_int LAPACKE_zsytri2_work( int matrix_order, char uplo, lapack_int n,
                                 lapack_complex_double* a, lapack_int lda,
                                 const lapack_int* ipiv,
                                 lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_zsytri2x( int matrix_order, char uplo, lapack_int n,
                             lapack_complex_double* a, lapack_int lda,
                             const lapack_int* ipiv, lapack_int nb );
lapack_int LAPACKE_zsytri2x_work( int matrix_order, char uplo, lapack_int n,
                                  lapack_complex_double* a, lapack_int lda,
                                  const lapack_int* ipiv,
                                  lapack_complex_double* work, lapack_int nb );
lapack_int LAPACKE_zsytrs2( int matrix_order, char uplo, lapack_int n,
                            lapack_int nrhs, const lapack_complex_double* a,
                            lapack_int lda, const lapack_int* ipiv,
                            lapack_complex_double* b, lapack_int ldb );
lapack_int LAPACKE_zsytrs2_work( int matrix_order, char uplo, lapack_int n,
                                 lapack_int nrhs, const lapack_complex_double* a,
                                 lapack_int lda, const lapack_int* ipiv,
                                 lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* work );
lapack_int LAPACKE_zunbdb( int matrix_order, char trans, char signs,
                           lapack_int m, lapack_int p, lapack_int q,
                           lapack_complex_double* x11, lapack_int ldx11,
                           lapack_complex_double* x12, lapack_int ldx12,
                           lapack_complex_double* x21, lapack_int ldx21,
                           lapack_complex_double* x22, lapack_int ldx22,
                           double* theta, double* phi,
                           lapack_complex_double* taup1,
                           lapack_complex_double* taup2,
                           lapack_complex_double* tauq1,
                           lapack_complex_double* tauq2 );
lapack_int LAPACKE_zunbdb_work( int matrix_order, char trans, char signs,
                                lapack_int m, lapack_int p, lapack_int q,
                                lapack_complex_double* x11, lapack_int ldx11,
                                lapack_complex_double* x12, lapack_int ldx12,
                                lapack_complex_double* x21, lapack_int ldx21,
                                lapack_complex_double* x22, lapack_int ldx22,
                                double* theta, double* phi,
                                lapack_complex_double* taup1,
                                lapack_complex_double* taup2,
                                lapack_complex_double* tauq1,
                                lapack_complex_double* tauq2,
                                lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_zuncsd( int matrix_order, char jobu1, char jobu2,
                           char jobv1t, char jobv2t, char trans, char signs,
                           lapack_int m, lapack_int p, lapack_int q,
                           lapack_complex_double* x11, lapack_int ldx11,
                           lapack_complex_double* x12, lapack_int ldx12,
                           lapack_complex_double* x21, lapack_int ldx21,
                           lapack_complex_double* x22, lapack_int ldx22,
                           double* theta, lapack_complex_double* u1,
                           lapack_int ldu1, lapack_complex_double* u2,
                           lapack_int ldu2, lapack_complex_double* v1t,
                           lapack_int ldv1t, lapack_complex_double* v2t,
                           lapack_int ldv2t );
lapack_int LAPACKE_zuncsd_work( int matrix_order, char jobu1, char jobu2,
                                char jobv1t, char jobv2t, char trans,
                                char signs, lapack_int m, lapack_int p,
                                lapack_int q, lapack_complex_double* x11,
                                lapack_int ldx11, lapack_complex_double* x12,
                                lapack_int ldx12, lapack_complex_double* x21,
                                lapack_int ldx21, lapack_complex_double* x22,
                                lapack_int ldx22, double* theta,
                                lapack_complex_double* u1, lapack_int ldu1,
                                lapack_complex_double* u2, lapack_int ldu2,
                                lapack_complex_double* v1t, lapack_int ldv1t,
                                lapack_complex_double* v2t, lapack_int ldv2t,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork, lapack_int lrwork,
                                lapack_int* iwork );
//LAPACK 3.4.0
lapack_int LAPACKE_sgemqrt( int matrix_order, char side, char trans,
                            lapack_int m, lapack_int n, lapack_int k,
                            lapack_int nb, const float* v, lapack_int ldv,
                            const float* t, lapack_int ldt, float* c,
                            lapack_int ldc );
lapack_int LAPACKE_dgemqrt( int matrix_order, char side, char trans,
                            lapack_int m, lapack_int n, lapack_int k,
                            lapack_int nb, const double* v, lapack_int ldv,
                            const double* t, lapack_int ldt, double* c,
                            lapack_int ldc );
lapack_int LAPACKE_cgemqrt( int matrix_order, char side, char trans,
                            lapack_int m, lapack_int n, lapack_int k,
                            lapack_int nb, const lapack_complex_float* v,
                            lapack_int ldv, const lapack_complex_float* t,
                            lapack_int ldt, lapack_complex_float* c,
                            lapack_int ldc );
lapack_int LAPACKE_zgemqrt( int matrix_order, char side, char trans,
                            lapack_int m, lapack_int n, lapack_int k,
                            lapack_int nb, const lapack_complex_double* v,
                            lapack_int ldv, const lapack_complex_double* t,
                            lapack_int ldt, lapack_complex_double* c,
                            lapack_int ldc );

lapack_int LAPACKE_sgeqrt( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nb, float* a, lapack_int lda, float* t,
                           lapack_int ldt );
lapack_int LAPACKE_dgeqrt( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nb, double* a, lapack_int lda, double* t,
                           lapack_int ldt );
lapack_int LAPACKE_cgeqrt( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nb, lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* t,
                           lapack_int ldt );
lapack_int LAPACKE_zgeqrt( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int nb, lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* t,
                           lapack_int ldt );

lapack_int LAPACKE_sgeqrt2( int matrix_order, lapack_int m, lapack_int n,
                            float* a, lapack_int lda, float* t,
                            lapack_int ldt );
lapack_int LAPACKE_dgeqrt2( int matrix_order, lapack_int m, lapack_int n,
                            double* a, lapack_int lda, double* t,
                            lapack_int ldt );
lapack_int LAPACKE_cgeqrt2( int matrix_order, lapack_int m, lapack_int n,
                            lapack_complex_float* a, lapack_int lda,
                            lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_zgeqrt2( int matrix_order, lapack_int m, lapack_int n,
                            lapack_complex_double* a, lapack_int lda,
                            lapack_complex_double* t, lapack_int ldt );

lapack_int LAPACKE_sgeqrt3( int matrix_order, lapack_int m, lapack_int n,
                            float* a, lapack_int lda, float* t,
                            lapack_int ldt );
lapack_int LAPACKE_dgeqrt3( int matrix_order, lapack_int m, lapack_int n,
                            double* a, lapack_int lda, double* t,
                            lapack_int ldt );
lapack_int LAPACKE_cgeqrt3( int matrix_order, lapack_int m, lapack_int n,
                            lapack_complex_float* a, lapack_int lda,
                            lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_zgeqrt3( int matrix_order, lapack_int m, lapack_int n,
                            lapack_complex_double* a, lapack_int lda,
                            lapack_complex_double* t, lapack_int ldt );

lapack_int LAPACKE_stpmqrt( int matrix_order, char side, char trans,
                            lapack_int m, lapack_int n, lapack_int k,
                            lapack_int l, lapack_int nb, const float* v,
                            lapack_int ldv, const float* t, lapack_int ldt,
                            float* a, lapack_int lda, float* b,
                            lapack_int ldb );
lapack_int LAPACKE_dtpmqrt( int matrix_order, char side, char trans,
                            lapack_int m, lapack_int n, lapack_int k,
                            lapack_int l, lapack_int nb, const double* v,
                            lapack_int ldv, const double* t, lapack_int ldt,
                            double* a, lapack_int lda, double* b,
                            lapack_int ldb );
lapack_int LAPACKE_ctpmqrt( int matrix_order, char side, char trans,
                            lapack_int m, lapack_int n, lapack_int k,
                            lapack_int l, lapack_int nb,
                            const lapack_complex_float* v, lapack_int ldv,
                            const lapack_complex_float* t, lapack_int ldt,
                            lapack_complex_float* a, lapack_int lda,
                            lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_ztpmqrt( int matrix_order, char side, char trans,
                            lapack_int m, lapack_int n, lapack_int k,
                            lapack_int l, lapack_int nb,
                            const lapack_complex_double* v, lapack_int ldv,
                            const lapack_complex_double* t, lapack_int ldt,
                            lapack_complex_double* a, lapack_int lda,
                            lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_dtpqrt( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int l, lapack_int nb, double* a,
                           lapack_int lda, double* b, lapack_int ldb, double* t,
                           lapack_int ldt );
lapack_int LAPACKE_ctpqrt( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int l, lapack_int nb,
                           lapack_complex_float* a, lapack_int lda, 
                           lapack_complex_float* b, lapack_int ldb,
                           lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_ztpqrt( int matrix_order, lapack_int m, lapack_int n,
                           lapack_int l, lapack_int nb,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb,
                           lapack_complex_double* t, lapack_int ldt );

lapack_int LAPACKE_stpqrt2( int matrix_order,
                            lapack_int m, lapack_int n, lapack_int l,
                            float* a, lapack_int lda,
                            float* b, lapack_int ldb,
                            float* t, lapack_int ldt );
lapack_int LAPACKE_dtpqrt2( int matrix_order,
                            lapack_int m, lapack_int n, lapack_int l,
                            double* a, lapack_int lda,
                            double* b, lapack_int ldb,
                            double* t, lapack_int ldt );
lapack_int LAPACKE_ctpqrt2( int matrix_order, 
                            lapack_int m, lapack_int n, lapack_int l,
                            lapack_complex_float* a, lapack_int lda,
                            lapack_complex_float* b, lapack_int ldb,
                            lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_ztpqrt2( int matrix_order,
                            lapack_int m, lapack_int n, lapack_int l,
                            lapack_complex_double* a, lapack_int lda,
                            lapack_complex_double* b, lapack_int ldb,
                            lapack_complex_double* t, lapack_int ldt );

lapack_int LAPACKE_stprfb( int matrix_order, char side, char trans, char direct,
                           char storev, lapack_int m, lapack_int n,
                           lapack_int k, lapack_int l, const float* v,
                           lapack_int ldv, const float* t, lapack_int ldt,
                           float* a, lapack_int lda, float* b, lapack_int ldb );
lapack_int LAPACKE_dtprfb( int matrix_order, char side, char trans, char direct,
                           char storev, lapack_int m, lapack_int n,
                           lapack_int k, lapack_int l, const double* v,
                           lapack_int ldv, const double* t, lapack_int ldt,
                           double* a, lapack_int lda, double* b, lapack_int ldb );
lapack_int LAPACKE_ctprfb( int matrix_order, char side, char trans, char direct,
                           char storev, lapack_int m, lapack_int n,
                           lapack_int k, lapack_int l,
                           const lapack_complex_float* v, lapack_int ldv,
                           const lapack_complex_float* t, lapack_int ldt,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_ztprfb( int matrix_order, char side, char trans, char direct,
                           char storev, lapack_int m, lapack_int n,
                           lapack_int k, lapack_int l,
                           const lapack_complex_double* v, lapack_int ldv,
                           const lapack_complex_double* t, lapack_int ldt,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* b, lapack_int ldb );

lapack_int LAPACKE_sgemqrt_work( int matrix_order, char side, char trans,
                                 lapack_int m, lapack_int n, lapack_int k,
                                 lapack_int nb, const float* v, lapack_int ldv,
                                 const float* t, lapack_int ldt, float* c,
                                 lapack_int ldc, float* work );
lapack_int LAPACKE_dgemqrt_work( int matrix_order, char side, char trans,
                                 lapack_int m, lapack_int n, lapack_int k,
                                 lapack_int nb, const double* v, lapack_int ldv,
                                 const double* t, lapack_int ldt, double* c,
                                 lapack_int ldc, double* work );
lapack_int LAPACKE_cgemqrt_work( int matrix_order, char side, char trans,
                                 lapack_int m, lapack_int n, lapack_int k,
                                 lapack_int nb, const lapack_complex_float* v,
                                 lapack_int ldv, const lapack_complex_float* t,
                                 lapack_int ldt, lapack_complex_float* c,
                                 lapack_int ldc, lapack_complex_float* work );
lapack_int LAPACKE_zgemqrt_work( int matrix_order, char side, char trans,
                                 lapack_int m, lapack_int n, lapack_int k,
                                 lapack_int nb, const lapack_complex_double* v,
                                 lapack_int ldv, const lapack_complex_double* t,
                                 lapack_int ldt, lapack_complex_double* c,
                                 lapack_int ldc, lapack_complex_double* work );

lapack_int LAPACKE_sgeqrt_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nb, float* a, lapack_int lda,
                                float* t, lapack_int ldt, float* work );
lapack_int LAPACKE_dgeqrt_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nb, double* a, lapack_int lda,
                                double* t, lapack_int ldt, double* work );
lapack_int LAPACKE_cgeqrt_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nb, lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* t,
                                lapack_int ldt, lapack_complex_float* work );
lapack_int LAPACKE_zgeqrt_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int nb, lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* t,
                                lapack_int ldt, lapack_complex_double* work );

lapack_int LAPACKE_sgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,
                                 float* a, lapack_int lda, float* t,
                                 lapack_int ldt );
lapack_int LAPACKE_dgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,
                                 double* a, lapack_int lda, double* t,
                                 lapack_int ldt );
lapack_int LAPACKE_cgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,
                                 lapack_complex_float* a, lapack_int lda,
                                 lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_zgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,
                                 lapack_complex_double* a, lapack_int lda,
                                 lapack_complex_double* t, lapack_int ldt );

lapack_int LAPACKE_sgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,
                                 float* a, lapack_int lda, float* t,
                                 lapack_int ldt );
lapack_int LAPACKE_dgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,
                                 double* a, lapack_int lda, double* t,
                                 lapack_int ldt );
lapack_int LAPACKE_cgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,
                                 lapack_complex_float* a, lapack_int lda,
                                 lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_zgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,
                                 lapack_complex_double* a, lapack_int lda,
                                 lapack_complex_double* t, lapack_int ldt );

lapack_int LAPACKE_stpmqrt_work( int matrix_order, char side, char trans,
                                 lapack_int m, lapack_int n, lapack_int k,
                                 lapack_int l, lapack_int nb, const float* v,
                                 lapack_int ldv, const float* t, lapack_int ldt,
                                 float* a, lapack_int lda, float* b,
                                 lapack_int ldb, float* work );
lapack_int LAPACKE_dtpmqrt_work( int matrix_order, char side, char trans,
                                 lapack_int m, lapack_int n, lapack_int k,
                                 lapack_int l, lapack_int nb, const double* v,
                                 lapack_int ldv, const double* t,
                                 lapack_int ldt, double* a, lapack_int lda,
                                 double* b, lapack_int ldb, double* work );
lapack_int LAPACKE_ctpmqrt_work( int matrix_order, char side, char trans,
                                 lapack_int m, lapack_int n, lapack_int k,
                                 lapack_int l, lapack_int nb,
                                 const lapack_complex_float* v, lapack_int ldv,
                                 const lapack_complex_float* t, lapack_int ldt,
                                 lapack_complex_float* a, lapack_int lda,
                                 lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* work );
lapack_int LAPACKE_ztpmqrt_work( int matrix_order, char side, char trans,
                                 lapack_int m, lapack_int n, lapack_int k,
                                 lapack_int l, lapack_int nb,
                                 const lapack_complex_double* v, lapack_int ldv,
                                 const lapack_complex_double* t, lapack_int ldt,
                                 lapack_complex_double* a, lapack_int lda,
                                 lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* work );

lapack_int LAPACKE_dtpqrt_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int l, lapack_int nb, double* a,
                                lapack_int lda, double* b, lapack_int ldb,
                                double* t, lapack_int ldt, double* work );
lapack_int LAPACKE_ctpqrt_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int l, lapack_int nb,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                lapack_complex_float* t, lapack_int ldt,
                                lapack_complex_float* work );
lapack_int LAPACKE_ztpqrt_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_int l, lapack_int nb,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                lapack_complex_double* t, lapack_int ldt,
                                lapack_complex_double* work );

lapack_int LAPACKE_stpqrt2_work( int matrix_order,
                                 lapack_int m, lapack_int n, lapack_int l,
                                 float* a, lapack_int lda,
                                 float* b, lapack_int ldb,
                                 float* t, lapack_int ldt );
lapack_int LAPACKE_dtpqrt2_work( int matrix_order,
                                 lapack_int m, lapack_int n, lapack_int l,
                                 double* a, lapack_int lda,
                                 double* b, lapack_int ldb,
                                 double* t, lapack_int ldt );
lapack_int LAPACKE_ctpqrt2_work( int matrix_order,
                                 lapack_int m, lapack_int n, lapack_int l,
                                 lapack_complex_float* a, lapack_int lda,
                                 lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_ztpqrt2_work( int matrix_order,
                                 lapack_int m, lapack_int n, lapack_int l,
                                 lapack_complex_double* a, lapack_int lda,
                                 lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* t, lapack_int ldt );

lapack_int LAPACKE_stprfb_work( int matrix_order, char side, char trans,
                                char direct, char storev, lapack_int m,
                                lapack_int n, lapack_int k, lapack_int l,
                                const float* v, lapack_int ldv, const float* t,
                                lapack_int ldt, float* a, lapack_int lda,
                                float* b, lapack_int ldb, const float* work,
                                lapack_int ldwork );
lapack_int LAPACKE_dtprfb_work( int matrix_order, char side, char trans,
                                char direct, char storev, lapack_int m,
                                lapack_int n, lapack_int k, lapack_int l,
                                const double* v, lapack_int ldv,
                                const double* t, lapack_int ldt, double* a,
                                lapack_int lda, double* b, lapack_int ldb,
                                const double* work, lapack_int ldwork );
lapack_int LAPACKE_ctprfb_work( int matrix_order, char side, char trans,
                                char direct, char storev, lapack_int m,
                                lapack_int n, lapack_int k, lapack_int l,
                                const lapack_complex_float* v, lapack_int ldv,
                                const lapack_complex_float* t, lapack_int ldt,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* b, lapack_int ldb,
                                const float* work, lapack_int ldwork );
lapack_int LAPACKE_ztprfb_work( int matrix_order, char side, char trans,
                                char direct, char storev, lapack_int m,
                                lapack_int n, lapack_int k, lapack_int l,
                                const lapack_complex_double* v, lapack_int ldv,
                                const lapack_complex_double* t, lapack_int ldt,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* b, lapack_int ldb,
                                const double* work, lapack_int ldwork );
//LAPACK 3.X.X
lapack_int LAPACKE_ssysv_rook( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, float* a, lapack_int lda,
                               lapack_int* ipiv, float* b, lapack_int ldb );
lapack_int LAPACKE_dsysv_rook( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, double* a, lapack_int lda,
                               lapack_int* ipiv, double* b, lapack_int ldb );
lapack_int LAPACKE_csysv_rook( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_float* a,
                               lapack_int lda, lapack_int* ipiv,
                               lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_zsysv_rook( int matrix_order, char uplo, lapack_int n,
                               lapack_int nrhs, lapack_complex_double* a,
                               lapack_int lda, lapack_int* ipiv,
                               lapack_complex_double* b, lapack_int ldb );
lapack_int LAPACKE_csyr( int matrix_order, char uplo, lapack_int n,
                             lapack_complex_float alpha,
                             const lapack_complex_float* x, lapack_int incx,
                             lapack_complex_float* a, lapack_int lda );
lapack_int LAPACKE_zsyr( int matrix_order, char uplo, lapack_int n,
                             lapack_complex_double alpha,
                             const lapack_complex_double* x, lapack_int incx,
                             lapack_complex_double* a, lapack_int lda );

lapack_int LAPACKE_ssysv_rook_work( int matrix_order, char uplo, lapack_int n,
                                    lapack_int nrhs, float* a, lapack_int lda,
                                    lapack_int* ipiv, float* b, lapack_int ldb,
                                    float* work, lapack_int lwork );
lapack_int LAPACKE_dsysv_rook_work( int matrix_order, char uplo, lapack_int n,
                                    lapack_int nrhs, double* a, lapack_int lda,
                                    lapack_int* ipiv, double* b, lapack_int ldb,
                                    double* work, lapack_int lwork );
lapack_int LAPACKE_csysv_rook_work( int matrix_order, char uplo, lapack_int n,
                                    lapack_int nrhs, lapack_complex_float* a,
                                    lapack_int lda, lapack_int* ipiv,
                                    lapack_complex_float* b, lapack_int ldb,
                                    lapack_complex_float* work,
                                    lapack_int lwork );
lapack_int LAPACKE_zsysv_rook_work( int matrix_order, char uplo, lapack_int n,
                                    lapack_int nrhs, lapack_complex_double* a,
                                    lapack_int lda, lapack_int* ipiv,
                                    lapack_complex_double* b, lapack_int ldb,
                                    lapack_complex_double* work,
                                    lapack_int lwork );
lapack_int LAPACKE_csyr_work( int matrix_order, char uplo, lapack_int n,
                                  lapack_complex_float alpha,
                                  const lapack_complex_float* x,
                                  lapack_int incx, lapack_complex_float* a,
                                  lapack_int lda );
lapack_int LAPACKE_zsyr_work( int matrix_order, char uplo, lapack_int n,
                                  lapack_complex_double alpha,
                                  const lapack_complex_double* x,
                                  lapack_int incx, lapack_complex_double* a,
                                  lapack_int lda );
void LAPACKE_ilaver( const lapack_int* vers_major,
                     const lapack_int* vers_minor,
                     const lapack_int* vers_patch );


#define LAPACK_sgetrf LAPACK_GLOBAL(sgetrf,SGETRF)
#define LAPACK_dgetrf LAPACK_GLOBAL(dgetrf,DGETRF)
#define LAPACK_cgetrf LAPACK_GLOBAL(cgetrf,CGETRF)
#define LAPACK_zgetrf LAPACK_GLOBAL(zgetrf,ZGETRF)
#define LAPACK_sgbtrf LAPACK_GLOBAL(sgbtrf,SGBTRF)
#define LAPACK_dgbtrf LAPACK_GLOBAL(dgbtrf,DGBTRF)
#define LAPACK_cgbtrf LAPACK_GLOBAL(cgbtrf,CGBTRF)
#define LAPACK_zgbtrf LAPACK_GLOBAL(zgbtrf,ZGBTRF)
#define LAPACK_sgttrf LAPACK_GLOBAL(sgttrf,SGTTRF)
#define LAPACK_dgttrf LAPACK_GLOBAL(dgttrf,DGTTRF)
#define LAPACK_cgttrf LAPACK_GLOBAL(cgttrf,CGTTRF)
#define LAPACK_zgttrf LAPACK_GLOBAL(zgttrf,ZGTTRF)
#define LAPACK_spotrf LAPACK_GLOBAL(spotrf,SPOTRF)
#define LAPACK_dpotrf LAPACK_GLOBAL(dpotrf,DPOTRF)
#define LAPACK_cpotrf LAPACK_GLOBAL(cpotrf,CPOTRF)
#define LAPACK_zpotrf LAPACK_GLOBAL(zpotrf,ZPOTRF)
#define LAPACK_dpstrf LAPACK_GLOBAL(dpstrf,DPSTRF)
#define LAPACK_spstrf LAPACK_GLOBAL(spstrf,SPSTRF)
#define LAPACK_zpstrf LAPACK_GLOBAL(zpstrf,ZPSTRF)
#define LAPACK_cpstrf LAPACK_GLOBAL(cpstrf,CPSTRF)
#define LAPACK_dpftrf LAPACK_GLOBAL(dpftrf,DPFTRF)
#define LAPACK_spftrf LAPACK_GLOBAL(spftrf,SPFTRF)
#define LAPACK_zpftrf LAPACK_GLOBAL(zpftrf,ZPFTRF)
#define LAPACK_cpftrf LAPACK_GLOBAL(cpftrf,CPFTRF)
#define LAPACK_spptrf LAPACK_GLOBAL(spptrf,SPPTRF)
#define LAPACK_dpptrf LAPACK_GLOBAL(dpptrf,DPPTRF)
#define LAPACK_cpptrf LAPACK_GLOBAL(cpptrf,CPPTRF)
#define LAPACK_zpptrf LAPACK_GLOBAL(zpptrf,ZPPTRF)
#define LAPACK_spbtrf LAPACK_GLOBAL(spbtrf,SPBTRF)
#define LAPACK_dpbtrf LAPACK_GLOBAL(dpbtrf,DPBTRF)
#define LAPACK_cpbtrf LAPACK_GLOBAL(cpbtrf,CPBTRF)
#define LAPACK_zpbtrf LAPACK_GLOBAL(zpbtrf,ZPBTRF)
#define LAPACK_spttrf LAPACK_GLOBAL(spttrf,SPTTRF)
#define LAPACK_dpttrf LAPACK_GLOBAL(dpttrf,DPTTRF)
#define LAPACK_cpttrf LAPACK_GLOBAL(cpttrf,CPTTRF)
#define LAPACK_zpttrf LAPACK_GLOBAL(zpttrf,ZPTTRF)
#define LAPACK_ssytrf LAPACK_GLOBAL(ssytrf,SSYTRF)
#define LAPACK_dsytrf LAPACK_GLOBAL(dsytrf,DSYTRF)
#define LAPACK_csytrf LAPACK_GLOBAL(csytrf,CSYTRF)
#define LAPACK_zsytrf LAPACK_GLOBAL(zsytrf,ZSYTRF)
#define LAPACK_chetrf LAPACK_GLOBAL(chetrf,CHETRF)
#define LAPACK_zhetrf LAPACK_GLOBAL(zhetrf,ZHETRF)
#define LAPACK_ssptrf LAPACK_GLOBAL(ssptrf,SSPTRF)
#define LAPACK_dsptrf LAPACK_GLOBAL(dsptrf,DSPTRF)
#define LAPACK_csptrf LAPACK_GLOBAL(csptrf,CSPTRF)
#define LAPACK_zsptrf LAPACK_GLOBAL(zsptrf,ZSPTRF)
#define LAPACK_chptrf LAPACK_GLOBAL(chptrf,CHPTRF)
#define LAPACK_zhptrf LAPACK_GLOBAL(zhptrf,ZHPTRF)
#define LAPACK_sgetrs LAPACK_GLOBAL(sgetrs,SGETRS)
#define LAPACK_dgetrs LAPACK_GLOBAL(dgetrs,DGETRS)
#define LAPACK_cgetrs LAPACK_GLOBAL(cgetrs,CGETRS)
#define LAPACK_zgetrs LAPACK_GLOBAL(zgetrs,ZGETRS)
#define LAPACK_sgbtrs LAPACK_GLOBAL(sgbtrs,SGBTRS)
#define LAPACK_dgbtrs LAPACK_GLOBAL(dgbtrs,DGBTRS)
#define LAPACK_cgbtrs LAPACK_GLOBAL(cgbtrs,CGBTRS)
#define LAPACK_zgbtrs LAPACK_GLOBAL(zgbtrs,ZGBTRS)
#define LAPACK_sgttrs LAPACK_GLOBAL(sgttrs,SGTTRS)
#define LAPACK_dgttrs LAPACK_GLOBAL(dgttrs,DGTTRS)
#define LAPACK_cgttrs LAPACK_GLOBAL(cgttrs,CGTTRS)
#define LAPACK_zgttrs LAPACK_GLOBAL(zgttrs,ZGTTRS)
#define LAPACK_spotrs LAPACK_GLOBAL(spotrs,SPOTRS)
#define LAPACK_dpotrs LAPACK_GLOBAL(dpotrs,DPOTRS)
#define LAPACK_cpotrs LAPACK_GLOBAL(cpotrs,CPOTRS)
#define LAPACK_zpotrs LAPACK_GLOBAL(zpotrs,ZPOTRS)
#define LAPACK_dpftrs LAPACK_GLOBAL(dpftrs,DPFTRS)
#define LAPACK_spftrs LAPACK_GLOBAL(spftrs,SPFTRS)
#define LAPACK_zpftrs LAPACK_GLOBAL(zpftrs,ZPFTRS)
#define LAPACK_cpftrs LAPACK_GLOBAL(cpftrs,CPFTRS)
#define LAPACK_spptrs LAPACK_GLOBAL(spptrs,SPPTRS)
#define LAPACK_dpptrs LAPACK_GLOBAL(dpptrs,DPPTRS)
#define LAPACK_cpptrs LAPACK_GLOBAL(cpptrs,CPPTRS)
#define LAPACK_zpptrs LAPACK_GLOBAL(zpptrs,ZPPTRS)
#define LAPACK_spbtrs LAPACK_GLOBAL(spbtrs,SPBTRS)
#define LAPACK_dpbtrs LAPACK_GLOBAL(dpbtrs,DPBTRS)
#define LAPACK_cpbtrs LAPACK_GLOBAL(cpbtrs,CPBTRS)
#define LAPACK_zpbtrs LAPACK_GLOBAL(zpbtrs,ZPBTRS)
#define LAPACK_spttrs LAPACK_GLOBAL(spttrs,SPTTRS)
#define LAPACK_dpttrs LAPACK_GLOBAL(dpttrs,DPTTRS)
#define LAPACK_cpttrs LAPACK_GLOBAL(cpttrs,CPTTRS)
#define LAPACK_zpttrs LAPACK_GLOBAL(zpttrs,ZPTTRS)
#define LAPACK_ssytrs LAPACK_GLOBAL(ssytrs,SSYTRS)
#define LAPACK_dsytrs LAPACK_GLOBAL(dsytrs,DSYTRS)
#define LAPACK_csytrs LAPACK_GLOBAL(csytrs,CSYTRS)
#define LAPACK_zsytrs LAPACK_GLOBAL(zsytrs,ZSYTRS)
#define LAPACK_chetrs LAPACK_GLOBAL(chetrs,CHETRS)
#define LAPACK_zhetrs LAPACK_GLOBAL(zhetrs,ZHETRS)
#define LAPACK_ssptrs LAPACK_GLOBAL(ssptrs,SSPTRS)
#define LAPACK_dsptrs LAPACK_GLOBAL(dsptrs,DSPTRS)
#define LAPACK_csptrs LAPACK_GLOBAL(csptrs,CSPTRS)
#define LAPACK_zsptrs LAPACK_GLOBAL(zsptrs,ZSPTRS)
#define LAPACK_chptrs LAPACK_GLOBAL(chptrs,CHPTRS)
#define LAPACK_zhptrs LAPACK_GLOBAL(zhptrs,ZHPTRS)
#define LAPACK_strtrs LAPACK_GLOBAL(strtrs,STRTRS)
#define LAPACK_dtrtrs LAPACK_GLOBAL(dtrtrs,DTRTRS)
#define LAPACK_ctrtrs LAPACK_GLOBAL(ctrtrs,CTRTRS)
#define LAPACK_ztrtrs LAPACK_GLOBAL(ztrtrs,ZTRTRS)
#define LAPACK_stptrs LAPACK_GLOBAL(stptrs,STPTRS)
#define LAPACK_dtptrs LAPACK_GLOBAL(dtptrs,DTPTRS)
#define LAPACK_ctptrs LAPACK_GLOBAL(ctptrs,CTPTRS)
#define LAPACK_ztptrs LAPACK_GLOBAL(ztptrs,ZTPTRS)
#define LAPACK_stbtrs LAPACK_GLOBAL(stbtrs,STBTRS)
#define LAPACK_dtbtrs LAPACK_GLOBAL(dtbtrs,DTBTRS)
#define LAPACK_ctbtrs LAPACK_GLOBAL(ctbtrs,CTBTRS)
#define LAPACK_ztbtrs LAPACK_GLOBAL(ztbtrs,ZTBTRS)
#define LAPACK_sgecon LAPACK_GLOBAL(sgecon,SGECON)
#define LAPACK_dgecon LAPACK_GLOBAL(dgecon,DGECON)
#define LAPACK_cgecon LAPACK_GLOBAL(cgecon,CGECON)
#define LAPACK_zgecon LAPACK_GLOBAL(zgecon,ZGECON)
#define LAPACK_sgbcon LAPACK_GLOBAL(sgbcon,SGBCON)
#define LAPACK_dgbcon LAPACK_GLOBAL(dgbcon,DGBCON)
#define LAPACK_cgbcon LAPACK_GLOBAL(cgbcon,CGBCON)
#define LAPACK_zgbcon LAPACK_GLOBAL(zgbcon,ZGBCON)
#define LAPACK_sgtcon LAPACK_GLOBAL(sgtcon,SGTCON)
#define LAPACK_dgtcon LAPACK_GLOBAL(dgtcon,DGTCON)
#define LAPACK_cgtcon LAPACK_GLOBAL(cgtcon,CGTCON)
#define LAPACK_zgtcon LAPACK_GLOBAL(zgtcon,ZGTCON)
#define LAPACK_spocon LAPACK_GLOBAL(spocon,SPOCON)
#define LAPACK_dpocon LAPACK_GLOBAL(dpocon,DPOCON)
#define LAPACK_cpocon LAPACK_GLOBAL(cpocon,CPOCON)
#define LAPACK_zpocon LAPACK_GLOBAL(zpocon,ZPOCON)
#define LAPACK_sppcon LAPACK_GLOBAL(sppcon,SPPCON)
#define LAPACK_dppcon LAPACK_GLOBAL(dppcon,DPPCON)
#define LAPACK_cppcon LAPACK_GLOBAL(cppcon,CPPCON)
#define LAPACK_zppcon LAPACK_GLOBAL(zppcon,ZPPCON)
#define LAPACK_spbcon LAPACK_GLOBAL(spbcon,SPBCON)
#define LAPACK_dpbcon LAPACK_GLOBAL(dpbcon,DPBCON)
#define LAPACK_cpbcon LAPACK_GLOBAL(cpbcon,CPBCON)
#define LAPACK_zpbcon LAPACK_GLOBAL(zpbcon,ZPBCON)
#define LAPACK_sptcon LAPACK_GLOBAL(sptcon,SPTCON)
#define LAPACK_dptcon LAPACK_GLOBAL(dptcon,DPTCON)
#define LAPACK_cptcon LAPACK_GLOBAL(cptcon,CPTCON)
#define LAPACK_zptcon LAPACK_GLOBAL(zptcon,ZPTCON)
#define LAPACK_ssycon LAPACK_GLOBAL(ssycon,SSYCON)
#define LAPACK_dsycon LAPACK_GLOBAL(dsycon,DSYCON)
#define LAPACK_csycon LAPACK_GLOBAL(csycon,CSYCON)
#define LAPACK_zsycon LAPACK_GLOBAL(zsycon,ZSYCON)
#define LAPACK_checon LAPACK_GLOBAL(checon,CHECON)
#define LAPACK_zhecon LAPACK_GLOBAL(zhecon,ZHECON)
#define LAPACK_sspcon LAPACK_GLOBAL(sspcon,SSPCON)
#define LAPACK_dspcon LAPACK_GLOBAL(dspcon,DSPCON)
#define LAPACK_cspcon LAPACK_GLOBAL(cspcon,CSPCON)
#define LAPACK_zspcon LAPACK_GLOBAL(zspcon,ZSPCON)
#define LAPACK_chpcon LAPACK_GLOBAL(chpcon,CHPCON)
#define LAPACK_zhpcon LAPACK_GLOBAL(zhpcon,ZHPCON)
#define LAPACK_strcon LAPACK_GLOBAL(strcon,STRCON)
#define LAPACK_dtrcon LAPACK_GLOBAL(dtrcon,DTRCON)
#define LAPACK_ctrcon LAPACK_GLOBAL(ctrcon,CTRCON)
#define LAPACK_ztrcon LAPACK_GLOBAL(ztrcon,ZTRCON)
#define LAPACK_stpcon LAPACK_GLOBAL(stpcon,STPCON)
#define LAPACK_dtpcon LAPACK_GLOBAL(dtpcon,DTPCON)
#define LAPACK_ctpcon LAPACK_GLOBAL(ctpcon,CTPCON)
#define LAPACK_ztpcon LAPACK_GLOBAL(ztpcon,ZTPCON)
#define LAPACK_stbcon LAPACK_GLOBAL(stbcon,STBCON)
#define LAPACK_dtbcon LAPACK_GLOBAL(dtbcon,DTBCON)
#define LAPACK_ctbcon LAPACK_GLOBAL(ctbcon,CTBCON)
#define LAPACK_ztbcon LAPACK_GLOBAL(ztbcon,ZTBCON)
#define LAPACK_sgerfs LAPACK_GLOBAL(sgerfs,SGERFS)
#define LAPACK_dgerfs LAPACK_GLOBAL(dgerfs,DGERFS)
#define LAPACK_cgerfs LAPACK_GLOBAL(cgerfs,CGERFS)
#define LAPACK_zgerfs LAPACK_GLOBAL(zgerfs,ZGERFS)
#define LAPACK_dgerfsx LAPACK_GLOBAL(dgerfsx,DGERFSX)
#define LAPACK_sgerfsx LAPACK_GLOBAL(sgerfsx,SGERFSX)
#define LAPACK_zgerfsx LAPACK_GLOBAL(zgerfsx,ZGERFSX)
#define LAPACK_cgerfsx LAPACK_GLOBAL(cgerfsx,CGERFSX)
#define LAPACK_sgbrfs LAPACK_GLOBAL(sgbrfs,SGBRFS)
#define LAPACK_dgbrfs LAPACK_GLOBAL(dgbrfs,DGBRFS)
#define LAPACK_cgbrfs LAPACK_GLOBAL(cgbrfs,CGBRFS)
#define LAPACK_zgbrfs LAPACK_GLOBAL(zgbrfs,ZGBRFS)
#define LAPACK_dgbrfsx LAPACK_GLOBAL(dgbrfsx,DGBRFSX)
#define LAPACK_sgbrfsx LAPACK_GLOBAL(sgbrfsx,SGBRFSX)
#define LAPACK_zgbrfsx LAPACK_GLOBAL(zgbrfsx,ZGBRFSX)
#define LAPACK_cgbrfsx LAPACK_GLOBAL(cgbrfsx,CGBRFSX)
#define LAPACK_sgtrfs LAPACK_GLOBAL(sgtrfs,SGTRFS)
#define LAPACK_dgtrfs LAPACK_GLOBAL(dgtrfs,DGTRFS)
#define LAPACK_cgtrfs LAPACK_GLOBAL(cgtrfs,CGTRFS)
#define LAPACK_zgtrfs LAPACK_GLOBAL(zgtrfs,ZGTRFS)
#define LAPACK_sporfs LAPACK_GLOBAL(sporfs,SPORFS)
#define LAPACK_dporfs LAPACK_GLOBAL(dporfs,DPORFS)
#define LAPACK_cporfs LAPACK_GLOBAL(cporfs,CPORFS)
#define LAPACK_zporfs LAPACK_GLOBAL(zporfs,ZPORFS)
#define LAPACK_dporfsx LAPACK_GLOBAL(dporfsx,DPORFSX)
#define LAPACK_sporfsx LAPACK_GLOBAL(sporfsx,SPORFSX)
#define LAPACK_zporfsx LAPACK_GLOBAL(zporfsx,ZPORFSX)
#define LAPACK_cporfsx LAPACK_GLOBAL(cporfsx,CPORFSX)
#define LAPACK_spprfs LAPACK_GLOBAL(spprfs,SPPRFS)
#define LAPACK_dpprfs LAPACK_GLOBAL(dpprfs,DPPRFS)
#define LAPACK_cpprfs LAPACK_GLOBAL(cpprfs,CPPRFS)
#define LAPACK_zpprfs LAPACK_GLOBAL(zpprfs,ZPPRFS)
#define LAPACK_spbrfs LAPACK_GLOBAL(spbrfs,SPBRFS)
#define LAPACK_dpbrfs LAPACK_GLOBAL(dpbrfs,DPBRFS)
#define LAPACK_cpbrfs LAPACK_GLOBAL(cpbrfs,CPBRFS)
#define LAPACK_zpbrfs LAPACK_GLOBAL(zpbrfs,ZPBRFS)
#define LAPACK_sptrfs LAPACK_GLOBAL(sptrfs,SPTRFS)
#define LAPACK_dptrfs LAPACK_GLOBAL(dptrfs,DPTRFS)
#define LAPACK_cptrfs LAPACK_GLOBAL(cptrfs,CPTRFS)
#define LAPACK_zptrfs LAPACK_GLOBAL(zptrfs,ZPTRFS)
#define LAPACK_ssyrfs LAPACK_GLOBAL(ssyrfs,SSYRFS)
#define LAPACK_dsyrfs LAPACK_GLOBAL(dsyrfs,DSYRFS)
#define LAPACK_csyrfs LAPACK_GLOBAL(csyrfs,CSYRFS)
#define LAPACK_zsyrfs LAPACK_GLOBAL(zsyrfs,ZSYRFS)
#define LAPACK_dsyrfsx LAPACK_GLOBAL(dsyrfsx,DSYRFSX)
#define LAPACK_ssyrfsx LAPACK_GLOBAL(ssyrfsx,SSYRFSX)
#define LAPACK_zsyrfsx LAPACK_GLOBAL(zsyrfsx,ZSYRFSX)
#define LAPACK_csyrfsx LAPACK_GLOBAL(csyrfsx,CSYRFSX)
#define LAPACK_cherfs LAPACK_GLOBAL(cherfs,CHERFS)
#define LAPACK_zherfs LAPACK_GLOBAL(zherfs,ZHERFS)
#define LAPACK_zherfsx LAPACK_GLOBAL(zherfsx,ZHERFSX)
#define LAPACK_cherfsx LAPACK_GLOBAL(cherfsx,CHERFSX)
#define LAPACK_ssprfs LAPACK_GLOBAL(ssprfs,SSPRFS)
#define LAPACK_dsprfs LAPACK_GLOBAL(dsprfs,DSPRFS)
#define LAPACK_csprfs LAPACK_GLOBAL(csprfs,CSPRFS)
#define LAPACK_zsprfs LAPACK_GLOBAL(zsprfs,ZSPRFS)
#define LAPACK_chprfs LAPACK_GLOBAL(chprfs,CHPRFS)
#define LAPACK_zhprfs LAPACK_GLOBAL(zhprfs,ZHPRFS)
#define LAPACK_strrfs LAPACK_GLOBAL(strrfs,STRRFS)
#define LAPACK_dtrrfs LAPACK_GLOBAL(dtrrfs,DTRRFS)
#define LAPACK_ctrrfs LAPACK_GLOBAL(ctrrfs,CTRRFS)
#define LAPACK_ztrrfs LAPACK_GLOBAL(ztrrfs,ZTRRFS)
#define LAPACK_stprfs LAPACK_GLOBAL(stprfs,STPRFS)
#define LAPACK_dtprfs LAPACK_GLOBAL(dtprfs,DTPRFS)
#define LAPACK_ctprfs LAPACK_GLOBAL(ctprfs,CTPRFS)
#define LAPACK_ztprfs LAPACK_GLOBAL(ztprfs,ZTPRFS)
#define LAPACK_stbrfs LAPACK_GLOBAL(stbrfs,STBRFS)
#define LAPACK_dtbrfs LAPACK_GLOBAL(dtbrfs,DTBRFS)
#define LAPACK_ctbrfs LAPACK_GLOBAL(ctbrfs,CTBRFS)
#define LAPACK_ztbrfs LAPACK_GLOBAL(ztbrfs,ZTBRFS)
#define LAPACK_sgetri LAPACK_GLOBAL(sgetri,SGETRI)
#define LAPACK_dgetri LAPACK_GLOBAL(dgetri,DGETRI)
#define LAPACK_cgetri LAPACK_GLOBAL(cgetri,CGETRI)
#define LAPACK_zgetri LAPACK_GLOBAL(zgetri,ZGETRI)
#define LAPACK_spotri LAPACK_GLOBAL(spotri,SPOTRI)
#define LAPACK_dpotri LAPACK_GLOBAL(dpotri,DPOTRI)
#define LAPACK_cpotri LAPACK_GLOBAL(cpotri,CPOTRI)
#define LAPACK_zpotri LAPACK_GLOBAL(zpotri,ZPOTRI)
#define LAPACK_dpftri LAPACK_GLOBAL(dpftri,DPFTRI)
#define LAPACK_spftri LAPACK_GLOBAL(spftri,SPFTRI)
#define LAPACK_zpftri LAPACK_GLOBAL(zpftri,ZPFTRI)
#define LAPACK_cpftri LAPACK_GLOBAL(cpftri,CPFTRI)
#define LAPACK_spptri LAPACK_GLOBAL(spptri,SPPTRI)
#define LAPACK_dpptri LAPACK_GLOBAL(dpptri,DPPTRI)
#define LAPACK_cpptri LAPACK_GLOBAL(cpptri,CPPTRI)
#define LAPACK_zpptri LAPACK_GLOBAL(zpptri,ZPPTRI)
#define LAPACK_ssytri LAPACK_GLOBAL(ssytri,SSYTRI)
#define LAPACK_dsytri LAPACK_GLOBAL(dsytri,DSYTRI)
#define LAPACK_csytri LAPACK_GLOBAL(csytri,CSYTRI)
#define LAPACK_zsytri LAPACK_GLOBAL(zsytri,ZSYTRI)
#define LAPACK_chetri LAPACK_GLOBAL(chetri,CHETRI)
#define LAPACK_zhetri LAPACK_GLOBAL(zhetri,ZHETRI)
#define LAPACK_ssptri LAPACK_GLOBAL(ssptri,SSPTRI)
#define LAPACK_dsptri LAPACK_GLOBAL(dsptri,DSPTRI)
#define LAPACK_csptri LAPACK_GLOBAL(csptri,CSPTRI)
#define LAPACK_zsptri LAPACK_GLOBAL(zsptri,ZSPTRI)
#define LAPACK_chptri LAPACK_GLOBAL(chptri,CHPTRI)
#define LAPACK_zhptri LAPACK_GLOBAL(zhptri,ZHPTRI)
#define LAPACK_strtri LAPACK_GLOBAL(strtri,STRTRI)
#define LAPACK_dtrtri LAPACK_GLOBAL(dtrtri,DTRTRI)
#define LAPACK_ctrtri LAPACK_GLOBAL(ctrtri,CTRTRI)
#define LAPACK_ztrtri LAPACK_GLOBAL(ztrtri,ZTRTRI)
#define LAPACK_dtftri LAPACK_GLOBAL(dtftri,DTFTRI)
#define LAPACK_stftri LAPACK_GLOBAL(stftri,STFTRI)
#define LAPACK_ztftri LAPACK_GLOBAL(ztftri,ZTFTRI)
#define LAPACK_ctftri LAPACK_GLOBAL(ctftri,CTFTRI)
#define LAPACK_stptri LAPACK_GLOBAL(stptri,STPTRI)
#define LAPACK_dtptri LAPACK_GLOBAL(dtptri,DTPTRI)
#define LAPACK_ctptri LAPACK_GLOBAL(ctptri,CTPTRI)
#define LAPACK_ztptri LAPACK_GLOBAL(ztptri,ZTPTRI)
#define LAPACK_sgeequ LAPACK_GLOBAL(sgeequ,SGEEQU)
#define LAPACK_dgeequ LAPACK_GLOBAL(dgeequ,DGEEQU)
#define LAPACK_cgeequ LAPACK_GLOBAL(cgeequ,CGEEQU)
#define LAPACK_zgeequ LAPACK_GLOBAL(zgeequ,ZGEEQU)
#define LAPACK_dgeequb LAPACK_GLOBAL(dgeequb,DGEEQUB)
#define LAPACK_sgeequb LAPACK_GLOBAL(sgeequb,SGEEQUB)
#define LAPACK_zgeequb LAPACK_GLOBAL(zgeequb,ZGEEQUB)
#define LAPACK_cgeequb LAPACK_GLOBAL(cgeequb,CGEEQUB)
#define LAPACK_sgbequ LAPACK_GLOBAL(sgbequ,SGBEQU)
#define LAPACK_dgbequ LAPACK_GLOBAL(dgbequ,DGBEQU)
#define LAPACK_cgbequ LAPACK_GLOBAL(cgbequ,CGBEQU)
#define LAPACK_zgbequ LAPACK_GLOBAL(zgbequ,ZGBEQU)
#define LAPACK_dgbequb LAPACK_GLOBAL(dgbequb,DGBEQUB)
#define LAPACK_sgbequb LAPACK_GLOBAL(sgbequb,SGBEQUB)
#define LAPACK_zgbequb LAPACK_GLOBAL(zgbequb,ZGBEQUB)
#define LAPACK_cgbequb LAPACK_GLOBAL(cgbequb,CGBEQUB)
#define LAPACK_spoequ LAPACK_GLOBAL(spoequ,SPOEQU)
#define LAPACK_dpoequ LAPACK_GLOBAL(dpoequ,DPOEQU)
#define LAPACK_cpoequ LAPACK_GLOBAL(cpoequ,CPOEQU)
#define LAPACK_zpoequ LAPACK_GLOBAL(zpoequ,ZPOEQU)
#define LAPACK_dpoequb LAPACK_GLOBAL(dpoequb,DPOEQUB)
#define LAPACK_spoequb LAPACK_GLOBAL(spoequb,SPOEQUB)
#define LAPACK_zpoequb LAPACK_GLOBAL(zpoequb,ZPOEQUB)
#define LAPACK_cpoequb LAPACK_GLOBAL(cpoequb,CPOEQUB)
#define LAPACK_sppequ LAPACK_GLOBAL(sppequ,SPPEQU)
#define LAPACK_dppequ LAPACK_GLOBAL(dppequ,DPPEQU)
#define LAPACK_cppequ LAPACK_GLOBAL(cppequ,CPPEQU)
#define LAPACK_zppequ LAPACK_GLOBAL(zppequ,ZPPEQU)
#define LAPACK_spbequ LAPACK_GLOBAL(spbequ,SPBEQU)
#define LAPACK_dpbequ LAPACK_GLOBAL(dpbequ,DPBEQU)
#define LAPACK_cpbequ LAPACK_GLOBAL(cpbequ,CPBEQU)
#define LAPACK_zpbequ LAPACK_GLOBAL(zpbequ,ZPBEQU)
#define LAPACK_dsyequb LAPACK_GLOBAL(dsyequb,DSYEQUB)
#define LAPACK_ssyequb LAPACK_GLOBAL(ssyequb,SSYEQUB)
#define LAPACK_zsyequb LAPACK_GLOBAL(zsyequb,ZSYEQUB)
#define LAPACK_csyequb LAPACK_GLOBAL(csyequb,CSYEQUB)
#define LAPACK_zheequb LAPACK_GLOBAL(zheequb,ZHEEQUB)
#define LAPACK_cheequb LAPACK_GLOBAL(cheequb,CHEEQUB)
#define LAPACK_sgesv LAPACK_GLOBAL(sgesv,SGESV)
#define LAPACK_dgesv LAPACK_GLOBAL(dgesv,DGESV)
#define LAPACK_cgesv LAPACK_GLOBAL(cgesv,CGESV)
#define LAPACK_zgesv LAPACK_GLOBAL(zgesv,ZGESV)
#define LAPACK_dsgesv LAPACK_GLOBAL(dsgesv,DSGESV)
#define LAPACK_zcgesv LAPACK_GLOBAL(zcgesv,ZCGESV)
#define LAPACK_sgesvx LAPACK_GLOBAL(sgesvx,SGESVX)
#define LAPACK_dgesvx LAPACK_GLOBAL(dgesvx,DGESVX)
#define LAPACK_cgesvx LAPACK_GLOBAL(cgesvx,CGESVX)
#define LAPACK_zgesvx LAPACK_GLOBAL(zgesvx,ZGESVX)
#define LAPACK_dgesvxx LAPACK_GLOBAL(dgesvxx,DGESVXX)
#define LAPACK_sgesvxx LAPACK_GLOBAL(sgesvxx,SGESVXX)
#define LAPACK_zgesvxx LAPACK_GLOBAL(zgesvxx,ZGESVXX)
#define LAPACK_cgesvxx LAPACK_GLOBAL(cgesvxx,CGESVXX)
#define LAPACK_sgbsv LAPACK_GLOBAL(sgbsv,SGBSV)
#define LAPACK_dgbsv LAPACK_GLOBAL(dgbsv,DGBSV)
#define LAPACK_cgbsv LAPACK_GLOBAL(cgbsv,CGBSV)
#define LAPACK_zgbsv LAPACK_GLOBAL(zgbsv,ZGBSV)
#define LAPACK_sgbsvx LAPACK_GLOBAL(sgbsvx,SGBSVX)
#define LAPACK_dgbsvx LAPACK_GLOBAL(dgbsvx,DGBSVX)
#define LAPACK_cgbsvx LAPACK_GLOBAL(cgbsvx,CGBSVX)
#define LAPACK_zgbsvx LAPACK_GLOBAL(zgbsvx,ZGBSVX)
#define LAPACK_dgbsvxx LAPACK_GLOBAL(dgbsvxx,DGBSVXX)
#define LAPACK_sgbsvxx LAPACK_GLOBAL(sgbsvxx,SGBSVXX)
#define LAPACK_zgbsvxx LAPACK_GLOBAL(zgbsvxx,ZGBSVXX)
#define LAPACK_cgbsvxx LAPACK_GLOBAL(cgbsvxx,CGBSVXX)
#define LAPACK_sgtsv LAPACK_GLOBAL(sgtsv,SGTSV)
#define LAPACK_dgtsv LAPACK_GLOBAL(dgtsv,DGTSV)
#define LAPACK_cgtsv LAPACK_GLOBAL(cgtsv,CGTSV)
#define LAPACK_zgtsv LAPACK_GLOBAL(zgtsv,ZGTSV)
#define LAPACK_sgtsvx LAPACK_GLOBAL(sgtsvx,SGTSVX)
#define LAPACK_dgtsvx LAPACK_GLOBAL(dgtsvx,DGTSVX)
#define LAPACK_cgtsvx LAPACK_GLOBAL(cgtsvx,CGTSVX)
#define LAPACK_zgtsvx LAPACK_GLOBAL(zgtsvx,ZGTSVX)
#define LAPACK_sposv LAPACK_GLOBAL(sposv,SPOSV)
#define LAPACK_dposv LAPACK_GLOBAL(dposv,DPOSV)
#define LAPACK_cposv LAPACK_GLOBAL(cposv,CPOSV)
#define LAPACK_zposv LAPACK_GLOBAL(zposv,ZPOSV)
#define LAPACK_dsposv LAPACK_GLOBAL(dsposv,DSPOSV)
#define LAPACK_zcposv LAPACK_GLOBAL(zcposv,ZCPOSV)
#define LAPACK_sposvx LAPACK_GLOBAL(sposvx,SPOSVX)
#define LAPACK_dposvx LAPACK_GLOBAL(dposvx,DPOSVX)
#define LAPACK_cposvx LAPACK_GLOBAL(cposvx,CPOSVX)
#define LAPACK_zposvx LAPACK_GLOBAL(zposvx,ZPOSVX)
#define LAPACK_dposvxx LAPACK_GLOBAL(dposvxx,DPOSVXX)
#define LAPACK_sposvxx LAPACK_GLOBAL(sposvxx,SPOSVXX)
#define LAPACK_zposvxx LAPACK_GLOBAL(zposvxx,ZPOSVXX)
#define LAPACK_cposvxx LAPACK_GLOBAL(cposvxx,CPOSVXX)
#define LAPACK_sppsv LAPACK_GLOBAL(sppsv,SPPSV)
#define LAPACK_dppsv LAPACK_GLOBAL(dppsv,DPPSV)
#define LAPACK_cppsv LAPACK_GLOBAL(cppsv,CPPSV)
#define LAPACK_zppsv LAPACK_GLOBAL(zppsv,ZPPSV)
#define LAPACK_sppsvx LAPACK_GLOBAL(sppsvx,SPPSVX)
#define LAPACK_dppsvx LAPACK_GLOBAL(dppsvx,DPPSVX)
#define LAPACK_cppsvx LAPACK_GLOBAL(cppsvx,CPPSVX)
#define LAPACK_zppsvx LAPACK_GLOBAL(zppsvx,ZPPSVX)
#define LAPACK_spbsv LAPACK_GLOBAL(spbsv,SPBSV)
#define LAPACK_dpbsv LAPACK_GLOBAL(dpbsv,DPBSV)
#define LAPACK_cpbsv LAPACK_GLOBAL(cpbsv,CPBSV)
#define LAPACK_zpbsv LAPACK_GLOBAL(zpbsv,ZPBSV)
#define LAPACK_spbsvx LAPACK_GLOBAL(spbsvx,SPBSVX)
#define LAPACK_dpbsvx LAPACK_GLOBAL(dpbsvx,DPBSVX)
#define LAPACK_cpbsvx LAPACK_GLOBAL(cpbsvx,CPBSVX)
#define LAPACK_zpbsvx LAPACK_GLOBAL(zpbsvx,ZPBSVX)
#define LAPACK_sptsv LAPACK_GLOBAL(sptsv,SPTSV)
#define LAPACK_dptsv LAPACK_GLOBAL(dptsv,DPTSV)
#define LAPACK_cptsv LAPACK_GLOBAL(cptsv,CPTSV)
#define LAPACK_zptsv LAPACK_GLOBAL(zptsv,ZPTSV)
#define LAPACK_sptsvx LAPACK_GLOBAL(sptsvx,SPTSVX)
#define LAPACK_dptsvx LAPACK_GLOBAL(dptsvx,DPTSVX)
#define LAPACK_cptsvx LAPACK_GLOBAL(cptsvx,CPTSVX)
#define LAPACK_zptsvx LAPACK_GLOBAL(zptsvx,ZPTSVX)
#define LAPACK_ssysv LAPACK_GLOBAL(ssysv,SSYSV)
#define LAPACK_dsysv LAPACK_GLOBAL(dsysv,DSYSV)
#define LAPACK_csysv LAPACK_GLOBAL(csysv,CSYSV)
#define LAPACK_zsysv LAPACK_GLOBAL(zsysv,ZSYSV)
#define LAPACK_ssysvx LAPACK_GLOBAL(ssysvx,SSYSVX)
#define LAPACK_dsysvx LAPACK_GLOBAL(dsysvx,DSYSVX)
#define LAPACK_csysvx LAPACK_GLOBAL(csysvx,CSYSVX)
#define LAPACK_zsysvx LAPACK_GLOBAL(zsysvx,ZSYSVX)
#define LAPACK_dsysvxx LAPACK_GLOBAL(dsysvxx,DSYSVXX)
#define LAPACK_ssysvxx LAPACK_GLOBAL(ssysvxx,SSYSVXX)
#define LAPACK_zsysvxx LAPACK_GLOBAL(zsysvxx,ZSYSVXX)
#define LAPACK_csysvxx LAPACK_GLOBAL(csysvxx,CSYSVXX)
#define LAPACK_chesv LAPACK_GLOBAL(chesv,CHESV)
#define LAPACK_zhesv LAPACK_GLOBAL(zhesv,ZHESV)
#define LAPACK_chesvx LAPACK_GLOBAL(chesvx,CHESVX)
#define LAPACK_zhesvx LAPACK_GLOBAL(zhesvx,ZHESVX)
#define LAPACK_zhesvxx LAPACK_GLOBAL(zhesvxx,ZHESVXX)
#define LAPACK_chesvxx LAPACK_GLOBAL(chesvxx,CHESVXX)
#define LAPACK_sspsv LAPACK_GLOBAL(sspsv,SSPSV)
#define LAPACK_dspsv LAPACK_GLOBAL(dspsv,DSPSV)
#define LAPACK_cspsv LAPACK_GLOBAL(cspsv,CSPSV)
#define LAPACK_zspsv LAPACK_GLOBAL(zspsv,ZSPSV)
#define LAPACK_sspsvx LAPACK_GLOBAL(sspsvx,SSPSVX)
#define LAPACK_dspsvx LAPACK_GLOBAL(dspsvx,DSPSVX)
#define LAPACK_cspsvx LAPACK_GLOBAL(cspsvx,CSPSVX)
#define LAPACK_zspsvx LAPACK_GLOBAL(zspsvx,ZSPSVX)
#define LAPACK_chpsv LAPACK_GLOBAL(chpsv,CHPSV)
#define LAPACK_zhpsv LAPACK_GLOBAL(zhpsv,ZHPSV)
#define LAPACK_chpsvx LAPACK_GLOBAL(chpsvx,CHPSVX)
#define LAPACK_zhpsvx LAPACK_GLOBAL(zhpsvx,ZHPSVX)
#define LAPACK_sgeqrf LAPACK_GLOBAL(sgeqrf,SGEQRF)
#define LAPACK_dgeqrf LAPACK_GLOBAL(dgeqrf,DGEQRF)
#define LAPACK_cgeqrf LAPACK_GLOBAL(cgeqrf,CGEQRF)
#define LAPACK_zgeqrf LAPACK_GLOBAL(zgeqrf,ZGEQRF)
#define LAPACK_sgeqpf LAPACK_GLOBAL(sgeqpf,SGEQPF)
#define LAPACK_dgeqpf LAPACK_GLOBAL(dgeqpf,DGEQPF)
#define LAPACK_cgeqpf LAPACK_GLOBAL(cgeqpf,CGEQPF)
#define LAPACK_zgeqpf LAPACK_GLOBAL(zgeqpf,ZGEQPF)
#define LAPACK_sgeqp3 LAPACK_GLOBAL(sgeqp3,SGEQP3)
#define LAPACK_dgeqp3 LAPACK_GLOBAL(dgeqp3,DGEQP3)
#define LAPACK_cgeqp3 LAPACK_GLOBAL(cgeqp3,CGEQP3)
#define LAPACK_zgeqp3 LAPACK_GLOBAL(zgeqp3,ZGEQP3)
#define LAPACK_sorgqr LAPACK_GLOBAL(sorgqr,SORGQR)
#define LAPACK_dorgqr LAPACK_GLOBAL(dorgqr,DORGQR)
#define LAPACK_sormqr LAPACK_GLOBAL(sormqr,SORMQR)
#define LAPACK_dormqr LAPACK_GLOBAL(dormqr,DORMQR)
#define LAPACK_cungqr LAPACK_GLOBAL(cungqr,CUNGQR)
#define LAPACK_zungqr LAPACK_GLOBAL(zungqr,ZUNGQR)
#define LAPACK_cunmqr LAPACK_GLOBAL(cunmqr,CUNMQR)
#define LAPACK_zunmqr LAPACK_GLOBAL(zunmqr,ZUNMQR)
#define LAPACK_sgelqf LAPACK_GLOBAL(sgelqf,SGELQF)
#define LAPACK_dgelqf LAPACK_GLOBAL(dgelqf,DGELQF)
#define LAPACK_cgelqf LAPACK_GLOBAL(cgelqf,CGELQF)
#define LAPACK_zgelqf LAPACK_GLOBAL(zgelqf,ZGELQF)
#define LAPACK_sorglq LAPACK_GLOBAL(sorglq,SORGLQ)
#define LAPACK_dorglq LAPACK_GLOBAL(dorglq,DORGLQ)
#define LAPACK_sormlq LAPACK_GLOBAL(sormlq,SORMLQ)
#define LAPACK_dormlq LAPACK_GLOBAL(dormlq,DORMLQ)
#define LAPACK_cunglq LAPACK_GLOBAL(cunglq,CUNGLQ)
#define LAPACK_zunglq LAPACK_GLOBAL(zunglq,ZUNGLQ)
#define LAPACK_cunmlq LAPACK_GLOBAL(cunmlq,CUNMLQ)
#define LAPACK_zunmlq LAPACK_GLOBAL(zunmlq,ZUNMLQ)
#define LAPACK_sgeqlf LAPACK_GLOBAL(sgeqlf,SGEQLF)
#define LAPACK_dgeqlf LAPACK_GLOBAL(dgeqlf,DGEQLF)
#define LAPACK_cgeqlf LAPACK_GLOBAL(cgeqlf,CGEQLF)
#define LAPACK_zgeqlf LAPACK_GLOBAL(zgeqlf,ZGEQLF)
#define LAPACK_sorgql LAPACK_GLOBAL(sorgql,SORGQL)
#define LAPACK_dorgql LAPACK_GLOBAL(dorgql,DORGQL)
#define LAPACK_cungql LAPACK_GLOBAL(cungql,CUNGQL)
#define LAPACK_zungql LAPACK_GLOBAL(zungql,ZUNGQL)
#define LAPACK_sormql LAPACK_GLOBAL(sormql,SORMQL)
#define LAPACK_dormql LAPACK_GLOBAL(dormql,DORMQL)
#define LAPACK_cunmql LAPACK_GLOBAL(cunmql,CUNMQL)
#define LAPACK_zunmql LAPACK_GLOBAL(zunmql,ZUNMQL)
#define LAPACK_sgerqf LAPACK_GLOBAL(sgerqf,SGERQF)
#define LAPACK_dgerqf LAPACK_GLOBAL(dgerqf,DGERQF)
#define LAPACK_cgerqf LAPACK_GLOBAL(cgerqf,CGERQF)
#define LAPACK_zgerqf LAPACK_GLOBAL(zgerqf,ZGERQF)
#define LAPACK_sorgrq LAPACK_GLOBAL(sorgrq,SORGRQ)
#define LAPACK_dorgrq LAPACK_GLOBAL(dorgrq,DORGRQ)
#define LAPACK_cungrq LAPACK_GLOBAL(cungrq,CUNGRQ)
#define LAPACK_zungrq LAPACK_GLOBAL(zungrq,ZUNGRQ)
#define LAPACK_sormrq LAPACK_GLOBAL(sormrq,SORMRQ)
#define LAPACK_dormrq LAPACK_GLOBAL(dormrq,DORMRQ)
#define LAPACK_cunmrq LAPACK_GLOBAL(cunmrq,CUNMRQ)
#define LAPACK_zunmrq LAPACK_GLOBAL(zunmrq,ZUNMRQ)
#define LAPACK_stzrzf LAPACK_GLOBAL(stzrzf,STZRZF)
#define LAPACK_dtzrzf LAPACK_GLOBAL(dtzrzf,DTZRZF)
#define LAPACK_ctzrzf LAPACK_GLOBAL(ctzrzf,CTZRZF)
#define LAPACK_ztzrzf LAPACK_GLOBAL(ztzrzf,ZTZRZF)
#define LAPACK_sormrz LAPACK_GLOBAL(sormrz,SORMRZ)
#define LAPACK_dormrz LAPACK_GLOBAL(dormrz,DORMRZ)
#define LAPACK_cunmrz LAPACK_GLOBAL(cunmrz,CUNMRZ)
#define LAPACK_zunmrz LAPACK_GLOBAL(zunmrz,ZUNMRZ)
#define LAPACK_sggqrf LAPACK_GLOBAL(sggqrf,SGGQRF)
#define LAPACK_dggqrf LAPACK_GLOBAL(dggqrf,DGGQRF)
#define LAPACK_cggqrf LAPACK_GLOBAL(cggqrf,CGGQRF)
#define LAPACK_zggqrf LAPACK_GLOBAL(zggqrf,ZGGQRF)
#define LAPACK_sggrqf LAPACK_GLOBAL(sggrqf,SGGRQF)
#define LAPACK_dggrqf LAPACK_GLOBAL(dggrqf,DGGRQF)
#define LAPACK_cggrqf LAPACK_GLOBAL(cggrqf,CGGRQF)
#define LAPACK_zggrqf LAPACK_GLOBAL(zggrqf,ZGGRQF)
#define LAPACK_sgebrd LAPACK_GLOBAL(sgebrd,SGEBRD)
#define LAPACK_dgebrd LAPACK_GLOBAL(dgebrd,DGEBRD)
#define LAPACK_cgebrd LAPACK_GLOBAL(cgebrd,CGEBRD)
#define LAPACK_zgebrd LAPACK_GLOBAL(zgebrd,ZGEBRD)
#define LAPACK_sgbbrd LAPACK_GLOBAL(sgbbrd,SGBBRD)
#define LAPACK_dgbbrd LAPACK_GLOBAL(dgbbrd,DGBBRD)
#define LAPACK_cgbbrd LAPACK_GLOBAL(cgbbrd,CGBBRD)
#define LAPACK_zgbbrd LAPACK_GLOBAL(zgbbrd,ZGBBRD)
#define LAPACK_sorgbr LAPACK_GLOBAL(sorgbr,SORGBR)
#define LAPACK_dorgbr LAPACK_GLOBAL(dorgbr,DORGBR)
#define LAPACK_sormbr LAPACK_GLOBAL(sormbr,SORMBR)
#define LAPACK_dormbr LAPACK_GLOBAL(dormbr,DORMBR)
#define LAPACK_cungbr LAPACK_GLOBAL(cungbr,CUNGBR)
#define LAPACK_zungbr LAPACK_GLOBAL(zungbr,ZUNGBR)
#define LAPACK_cunmbr LAPACK_GLOBAL(cunmbr,CUNMBR)
#define LAPACK_zunmbr LAPACK_GLOBAL(zunmbr,ZUNMBR)
#define LAPACK_sbdsqr LAPACK_GLOBAL(sbdsqr,SBDSQR)
#define LAPACK_dbdsqr LAPACK_GLOBAL(dbdsqr,DBDSQR)
#define LAPACK_cbdsqr LAPACK_GLOBAL(cbdsqr,CBDSQR)
#define LAPACK_zbdsqr LAPACK_GLOBAL(zbdsqr,ZBDSQR)
#define LAPACK_sbdsdc LAPACK_GLOBAL(sbdsdc,SBDSDC)
#define LAPACK_dbdsdc LAPACK_GLOBAL(dbdsdc,DBDSDC)
#define LAPACK_ssytrd LAPACK_GLOBAL(ssytrd,SSYTRD)
#define LAPACK_dsytrd LAPACK_GLOBAL(dsytrd,DSYTRD)
#define LAPACK_sorgtr LAPACK_GLOBAL(sorgtr,SORGTR)
#define LAPACK_dorgtr LAPACK_GLOBAL(dorgtr,DORGTR)
#define LAPACK_sormtr LAPACK_GLOBAL(sormtr,SORMTR)
#define LAPACK_dormtr LAPACK_GLOBAL(dormtr,DORMTR)
#define LAPACK_chetrd LAPACK_GLOBAL(chetrd,CHETRD)
#define LAPACK_zhetrd LAPACK_GLOBAL(zhetrd,ZHETRD)
#define LAPACK_cungtr LAPACK_GLOBAL(cungtr,CUNGTR)
#define LAPACK_zungtr LAPACK_GLOBAL(zungtr,ZUNGTR)
#define LAPACK_cunmtr LAPACK_GLOBAL(cunmtr,CUNMTR)
#define LAPACK_zunmtr LAPACK_GLOBAL(zunmtr,ZUNMTR)
#define LAPACK_ssptrd LAPACK_GLOBAL(ssptrd,SSPTRD)
#define LAPACK_dsptrd LAPACK_GLOBAL(dsptrd,DSPTRD)
#define LAPACK_sopgtr LAPACK_GLOBAL(sopgtr,SOPGTR)
#define LAPACK_dopgtr LAPACK_GLOBAL(dopgtr,DOPGTR)
#define LAPACK_sopmtr LAPACK_GLOBAL(sopmtr,SOPMTR)
#define LAPACK_dopmtr LAPACK_GLOBAL(dopmtr,DOPMTR)
#define LAPACK_chptrd LAPACK_GLOBAL(chptrd,CHPTRD)
#define LAPACK_zhptrd LAPACK_GLOBAL(zhptrd,ZHPTRD)
#define LAPACK_cupgtr LAPACK_GLOBAL(cupgtr,CUPGTR)
#define LAPACK_zupgtr LAPACK_GLOBAL(zupgtr,ZUPGTR)
#define LAPACK_cupmtr LAPACK_GLOBAL(cupmtr,CUPMTR)
#define LAPACK_zupmtr LAPACK_GLOBAL(zupmtr,ZUPMTR)
#define LAPACK_ssbtrd LAPACK_GLOBAL(ssbtrd,SSBTRD)
#define LAPACK_dsbtrd LAPACK_GLOBAL(dsbtrd,DSBTRD)
#define LAPACK_chbtrd LAPACK_GLOBAL(chbtrd,CHBTRD)
#define LAPACK_zhbtrd LAPACK_GLOBAL(zhbtrd,ZHBTRD)
#define LAPACK_ssterf LAPACK_GLOBAL(ssterf,SSTERF)
#define LAPACK_dsterf LAPACK_GLOBAL(dsterf,DSTERF)
#define LAPACK_ssteqr LAPACK_GLOBAL(ssteqr,SSTEQR)
#define LAPACK_dsteqr LAPACK_GLOBAL(dsteqr,DSTEQR)
#define LAPACK_csteqr LAPACK_GLOBAL(csteqr,CSTEQR)
#define LAPACK_zsteqr LAPACK_GLOBAL(zsteqr,ZSTEQR)
#define LAPACK_sstemr LAPACK_GLOBAL(sstemr,SSTEMR)
#define LAPACK_dstemr LAPACK_GLOBAL(dstemr,DSTEMR)
#define LAPACK_cstemr LAPACK_GLOBAL(cstemr,CSTEMR)
#define LAPACK_zstemr LAPACK_GLOBAL(zstemr,ZSTEMR)
#define LAPACK_sstedc LAPACK_GLOBAL(sstedc,SSTEDC)
#define LAPACK_dstedc LAPACK_GLOBAL(dstedc,DSTEDC)
#define LAPACK_cstedc LAPACK_GLOBAL(cstedc,CSTEDC)
#define LAPACK_zstedc LAPACK_GLOBAL(zstedc,ZSTEDC)
#define LAPACK_sstegr LAPACK_GLOBAL(sstegr,SSTEGR)
#define LAPACK_dstegr LAPACK_GLOBAL(dstegr,DSTEGR)
#define LAPACK_cstegr LAPACK_GLOBAL(cstegr,CSTEGR)
#define LAPACK_zstegr LAPACK_GLOBAL(zstegr,ZSTEGR)
#define LAPACK_spteqr LAPACK_GLOBAL(spteqr,SPTEQR)
#define LAPACK_dpteqr LAPACK_GLOBAL(dpteqr,DPTEQR)
#define LAPACK_cpteqr LAPACK_GLOBAL(cpteqr,CPTEQR)
#define LAPACK_zpteqr LAPACK_GLOBAL(zpteqr,ZPTEQR)
#define LAPACK_sstebz LAPACK_GLOBAL(sstebz,SSTEBZ)
#define LAPACK_dstebz LAPACK_GLOBAL(dstebz,DSTEBZ)
#define LAPACK_sstein LAPACK_GLOBAL(sstein,SSTEIN)
#define LAPACK_dstein LAPACK_GLOBAL(dstein,DSTEIN)
#define LAPACK_cstein LAPACK_GLOBAL(cstein,CSTEIN)
#define LAPACK_zstein LAPACK_GLOBAL(zstein,ZSTEIN)
#define LAPACK_sdisna LAPACK_GLOBAL(sdisna,SDISNA)
#define LAPACK_ddisna LAPACK_GLOBAL(ddisna,DDISNA)
#define LAPACK_ssygst LAPACK_GLOBAL(ssygst,SSYGST)
#define LAPACK_dsygst LAPACK_GLOBAL(dsygst,DSYGST)
#define LAPACK_chegst LAPACK_GLOBAL(chegst,CHEGST)
#define LAPACK_zhegst LAPACK_GLOBAL(zhegst,ZHEGST)
#define LAPACK_sspgst LAPACK_GLOBAL(sspgst,SSPGST)
#define LAPACK_dspgst LAPACK_GLOBAL(dspgst,DSPGST)
#define LAPACK_chpgst LAPACK_GLOBAL(chpgst,CHPGST)
#define LAPACK_zhpgst LAPACK_GLOBAL(zhpgst,ZHPGST)
#define LAPACK_ssbgst LAPACK_GLOBAL(ssbgst,SSBGST)
#define LAPACK_dsbgst LAPACK_GLOBAL(dsbgst,DSBGST)
#define LAPACK_chbgst LAPACK_GLOBAL(chbgst,CHBGST)
#define LAPACK_zhbgst LAPACK_GLOBAL(zhbgst,ZHBGST)
#define LAPACK_spbstf LAPACK_GLOBAL(spbstf,SPBSTF)
#define LAPACK_dpbstf LAPACK_GLOBAL(dpbstf,DPBSTF)
#define LAPACK_cpbstf LAPACK_GLOBAL(cpbstf,CPBSTF)
#define LAPACK_zpbstf LAPACK_GLOBAL(zpbstf,ZPBSTF)
#define LAPACK_sgehrd LAPACK_GLOBAL(sgehrd,SGEHRD)
#define LAPACK_dgehrd LAPACK_GLOBAL(dgehrd,DGEHRD)
#define LAPACK_cgehrd LAPACK_GLOBAL(cgehrd,CGEHRD)
#define LAPACK_zgehrd LAPACK_GLOBAL(zgehrd,ZGEHRD)
#define LAPACK_sorghr LAPACK_GLOBAL(sorghr,SORGHR)
#define LAPACK_dorghr LAPACK_GLOBAL(dorghr,DORGHR)
#define LAPACK_sormhr LAPACK_GLOBAL(sormhr,SORMHR)
#define LAPACK_dormhr LAPACK_GLOBAL(dormhr,DORMHR)
#define LAPACK_cunghr LAPACK_GLOBAL(cunghr,CUNGHR)
#define LAPACK_zunghr LAPACK_GLOBAL(zunghr,ZUNGHR)
#define LAPACK_cunmhr LAPACK_GLOBAL(cunmhr,CUNMHR)
#define LAPACK_zunmhr LAPACK_GLOBAL(zunmhr,ZUNMHR)
#define LAPACK_sgebal LAPACK_GLOBAL(sgebal,SGEBAL)
#define LAPACK_dgebal LAPACK_GLOBAL(dgebal,DGEBAL)
#define LAPACK_cgebal LAPACK_GLOBAL(cgebal,CGEBAL)
#define LAPACK_zgebal LAPACK_GLOBAL(zgebal,ZGEBAL)
#define LAPACK_sgebak LAPACK_GLOBAL(sgebak,SGEBAK)
#define LAPACK_dgebak LAPACK_GLOBAL(dgebak,DGEBAK)
#define LAPACK_cgebak LAPACK_GLOBAL(cgebak,CGEBAK)
#define LAPACK_zgebak LAPACK_GLOBAL(zgebak,ZGEBAK)
#define LAPACK_shseqr LAPACK_GLOBAL(shseqr,SHSEQR)
#define LAPACK_dhseqr LAPACK_GLOBAL(dhseqr,DHSEQR)
#define LAPACK_chseqr LAPACK_GLOBAL(chseqr,CHSEQR)
#define LAPACK_zhseqr LAPACK_GLOBAL(zhseqr,ZHSEQR)
#define LAPACK_shsein LAPACK_GLOBAL(shsein,SHSEIN)
#define LAPACK_dhsein LAPACK_GLOBAL(dhsein,DHSEIN)
#define LAPACK_chsein LAPACK_GLOBAL(chsein,CHSEIN)
#define LAPACK_zhsein LAPACK_GLOBAL(zhsein,ZHSEIN)
#define LAPACK_strevc LAPACK_GLOBAL(strevc,STREVC)
#define LAPACK_dtrevc LAPACK_GLOBAL(dtrevc,DTREVC)
#define LAPACK_ctrevc LAPACK_GLOBAL(ctrevc,CTREVC)
#define LAPACK_ztrevc LAPACK_GLOBAL(ztrevc,ZTREVC)
#define LAPACK_strsna LAPACK_GLOBAL(strsna,STRSNA)
#define LAPACK_dtrsna LAPACK_GLOBAL(dtrsna,DTRSNA)
#define LAPACK_ctrsna LAPACK_GLOBAL(ctrsna,CTRSNA)
#define LAPACK_ztrsna LAPACK_GLOBAL(ztrsna,ZTRSNA)
#define LAPACK_strexc LAPACK_GLOBAL(strexc,STREXC)
#define LAPACK_dtrexc LAPACK_GLOBAL(dtrexc,DTREXC)
#define LAPACK_ctrexc LAPACK_GLOBAL(ctrexc,CTREXC)
#define LAPACK_ztrexc LAPACK_GLOBAL(ztrexc,ZTREXC)
#define LAPACK_strsen LAPACK_GLOBAL(strsen,STRSEN)
#define LAPACK_dtrsen LAPACK_GLOBAL(dtrsen,DTRSEN)
#define LAPACK_ctrsen LAPACK_GLOBAL(ctrsen,CTRSEN)
#define LAPACK_ztrsen LAPACK_GLOBAL(ztrsen,ZTRSEN)
#define LAPACK_strsyl LAPACK_GLOBAL(strsyl,STRSYL)
#define LAPACK_dtrsyl LAPACK_GLOBAL(dtrsyl,DTRSYL)
#define LAPACK_ctrsyl LAPACK_GLOBAL(ctrsyl,CTRSYL)
#define LAPACK_ztrsyl LAPACK_GLOBAL(ztrsyl,ZTRSYL)
#define LAPACK_sgghrd LAPACK_GLOBAL(sgghrd,SGGHRD)
#define LAPACK_dgghrd LAPACK_GLOBAL(dgghrd,DGGHRD)
#define LAPACK_cgghrd LAPACK_GLOBAL(cgghrd,CGGHRD)
#define LAPACK_zgghrd LAPACK_GLOBAL(zgghrd,ZGGHRD)
#define LAPACK_sggbal LAPACK_GLOBAL(sggbal,SGGBAL)
#define LAPACK_dggbal LAPACK_GLOBAL(dggbal,DGGBAL)
#define LAPACK_cggbal LAPACK_GLOBAL(cggbal,CGGBAL)
#define LAPACK_zggbal LAPACK_GLOBAL(zggbal,ZGGBAL)
#define LAPACK_sggbak LAPACK_GLOBAL(sggbak,SGGBAK)
#define LAPACK_dggbak LAPACK_GLOBAL(dggbak,DGGBAK)
#define LAPACK_cggbak LAPACK_GLOBAL(cggbak,CGGBAK)
#define LAPACK_zggbak LAPACK_GLOBAL(zggbak,ZGGBAK)
#define LAPACK_shgeqz LAPACK_GLOBAL(shgeqz,SHGEQZ)
#define LAPACK_dhgeqz LAPACK_GLOBAL(dhgeqz,DHGEQZ)
#define LAPACK_chgeqz LAPACK_GLOBAL(chgeqz,CHGEQZ)
#define LAPACK_zhgeqz LAPACK_GLOBAL(zhgeqz,ZHGEQZ)
#define LAPACK_stgevc LAPACK_GLOBAL(stgevc,STGEVC)
#define LAPACK_dtgevc LAPACK_GLOBAL(dtgevc,DTGEVC)
#define LAPACK_ctgevc LAPACK_GLOBAL(ctgevc,CTGEVC)
#define LAPACK_ztgevc LAPACK_GLOBAL(ztgevc,ZTGEVC)
#define LAPACK_stgexc LAPACK_GLOBAL(stgexc,STGEXC)
#define LAPACK_dtgexc LAPACK_GLOBAL(dtgexc,DTGEXC)
#define LAPACK_ctgexc LAPACK_GLOBAL(ctgexc,CTGEXC)
#define LAPACK_ztgexc LAPACK_GLOBAL(ztgexc,ZTGEXC)
#define LAPACK_stgsen LAPACK_GLOBAL(stgsen,STGSEN)
#define LAPACK_dtgsen LAPACK_GLOBAL(dtgsen,DTGSEN)
#define LAPACK_ctgsen LAPACK_GLOBAL(ctgsen,CTGSEN)
#define LAPACK_ztgsen LAPACK_GLOBAL(ztgsen,ZTGSEN)
#define LAPACK_stgsyl LAPACK_GLOBAL(stgsyl,STGSYL)
#define LAPACK_dtgsyl LAPACK_GLOBAL(dtgsyl,DTGSYL)
#define LAPACK_ctgsyl LAPACK_GLOBAL(ctgsyl,CTGSYL)
#define LAPACK_ztgsyl LAPACK_GLOBAL(ztgsyl,ZTGSYL)
#define LAPACK_stgsna LAPACK_GLOBAL(stgsna,STGSNA)
#define LAPACK_dtgsna LAPACK_GLOBAL(dtgsna,DTGSNA)
#define LAPACK_ctgsna LAPACK_GLOBAL(ctgsna,CTGSNA)
#define LAPACK_ztgsna LAPACK_GLOBAL(ztgsna,ZTGSNA)
#define LAPACK_sggsvp LAPACK_GLOBAL(sggsvp,SGGSVP)
#define LAPACK_dggsvp LAPACK_GLOBAL(dggsvp,DGGSVP)
#define LAPACK_cggsvp LAPACK_GLOBAL(cggsvp,CGGSVP)
#define LAPACK_zggsvp LAPACK_GLOBAL(zggsvp,ZGGSVP)
#define LAPACK_stgsja LAPACK_GLOBAL(stgsja,STGSJA)
#define LAPACK_dtgsja LAPACK_GLOBAL(dtgsja,DTGSJA)
#define LAPACK_ctgsja LAPACK_GLOBAL(ctgsja,CTGSJA)
#define LAPACK_ztgsja LAPACK_GLOBAL(ztgsja,ZTGSJA)
#define LAPACK_sgels LAPACK_GLOBAL(sgels,SGELS)
#define LAPACK_dgels LAPACK_GLOBAL(dgels,DGELS)
#define LAPACK_cgels LAPACK_GLOBAL(cgels,CGELS)
#define LAPACK_zgels LAPACK_GLOBAL(zgels,ZGELS)
#define LAPACK_sgelsy LAPACK_GLOBAL(sgelsy,SGELSY)
#define LAPACK_dgelsy LAPACK_GLOBAL(dgelsy,DGELSY)
#define LAPACK_cgelsy LAPACK_GLOBAL(cgelsy,CGELSY)
#define LAPACK_zgelsy LAPACK_GLOBAL(zgelsy,ZGELSY)
#define LAPACK_sgelss LAPACK_GLOBAL(sgelss,SGELSS)
#define LAPACK_dgelss LAPACK_GLOBAL(dgelss,DGELSS)
#define LAPACK_cgelss LAPACK_GLOBAL(cgelss,CGELSS)
#define LAPACK_zgelss LAPACK_GLOBAL(zgelss,ZGELSS)
#define LAPACK_sgelsd LAPACK_GLOBAL(sgelsd,SGELSD)
#define LAPACK_dgelsd LAPACK_GLOBAL(dgelsd,DGELSD)
#define LAPACK_cgelsd LAPACK_GLOBAL(cgelsd,CGELSD)
#define LAPACK_zgelsd LAPACK_GLOBAL(zgelsd,ZGELSD)
#define LAPACK_sgglse LAPACK_GLOBAL(sgglse,SGGLSE)
#define LAPACK_dgglse LAPACK_GLOBAL(dgglse,DGGLSE)
#define LAPACK_cgglse LAPACK_GLOBAL(cgglse,CGGLSE)
#define LAPACK_zgglse LAPACK_GLOBAL(zgglse,ZGGLSE)
#define LAPACK_sggglm LAPACK_GLOBAL(sggglm,SGGGLM)
#define LAPACK_dggglm LAPACK_GLOBAL(dggglm,DGGGLM)
#define LAPACK_cggglm LAPACK_GLOBAL(cggglm,CGGGLM)
#define LAPACK_zggglm LAPACK_GLOBAL(zggglm,ZGGGLM)
#define LAPACK_ssyev LAPACK_GLOBAL(ssyev,SSYEV)
#define LAPACK_dsyev LAPACK_GLOBAL(dsyev,DSYEV)
#define LAPACK_cheev LAPACK_GLOBAL(cheev,CHEEV)
#define LAPACK_zheev LAPACK_GLOBAL(zheev,ZHEEV)
#define LAPACK_ssyevd LAPACK_GLOBAL(ssyevd,SSYEVD)
#define LAPACK_dsyevd LAPACK_GLOBAL(dsyevd,DSYEVD)
#define LAPACK_cheevd LAPACK_GLOBAL(cheevd,CHEEVD)
#define LAPACK_zheevd LAPACK_GLOBAL(zheevd,ZHEEVD)
#define LAPACK_ssyevx LAPACK_GLOBAL(ssyevx,SSYEVX)
#define LAPACK_dsyevx LAPACK_GLOBAL(dsyevx,DSYEVX)
#define LAPACK_cheevx LAPACK_GLOBAL(cheevx,CHEEVX)
#define LAPACK_zheevx LAPACK_GLOBAL(zheevx,ZHEEVX)
#define LAPACK_ssyevr LAPACK_GLOBAL(ssyevr,SSYEVR)
#define LAPACK_dsyevr LAPACK_GLOBAL(dsyevr,DSYEVR)
#define LAPACK_cheevr LAPACK_GLOBAL(cheevr,CHEEVR)
#define LAPACK_zheevr LAPACK_GLOBAL(zheevr,ZHEEVR)
#define LAPACK_sspev LAPACK_GLOBAL(sspev,SSPEV)
#define LAPACK_dspev LAPACK_GLOBAL(dspev,DSPEV)
#define LAPACK_chpev LAPACK_GLOBAL(chpev,CHPEV)
#define LAPACK_zhpev LAPACK_GLOBAL(zhpev,ZHPEV)
#define LAPACK_sspevd LAPACK_GLOBAL(sspevd,SSPEVD)
#define LAPACK_dspevd LAPACK_GLOBAL(dspevd,DSPEVD)
#define LAPACK_chpevd LAPACK_GLOBAL(chpevd,CHPEVD)
#define LAPACK_zhpevd LAPACK_GLOBAL(zhpevd,ZHPEVD)
#define LAPACK_sspevx LAPACK_GLOBAL(sspevx,SSPEVX)
#define LAPACK_dspevx LAPACK_GLOBAL(dspevx,DSPEVX)
#define LAPACK_chpevx LAPACK_GLOBAL(chpevx,CHPEVX)
#define LAPACK_zhpevx LAPACK_GLOBAL(zhpevx,ZHPEVX)
#define LAPACK_ssbev LAPACK_GLOBAL(ssbev,SSBEV)
#define LAPACK_dsbev LAPACK_GLOBAL(dsbev,DSBEV)
#define LAPACK_chbev LAPACK_GLOBAL(chbev,CHBEV)
#define LAPACK_zhbev LAPACK_GLOBAL(zhbev,ZHBEV)
#define LAPACK_ssbevd LAPACK_GLOBAL(ssbevd,SSBEVD)
#define LAPACK_dsbevd LAPACK_GLOBAL(dsbevd,DSBEVD)
#define LAPACK_chbevd LAPACK_GLOBAL(chbevd,CHBEVD)
#define LAPACK_zhbevd LAPACK_GLOBAL(zhbevd,ZHBEVD)
#define LAPACK_ssbevx LAPACK_GLOBAL(ssbevx,SSBEVX)
#define LAPACK_dsbevx LAPACK_GLOBAL(dsbevx,DSBEVX)
#define LAPACK_chbevx LAPACK_GLOBAL(chbevx,CHBEVX)
#define LAPACK_zhbevx LAPACK_GLOBAL(zhbevx,ZHBEVX)
#define LAPACK_sstev LAPACK_GLOBAL(sstev,SSTEV)
#define LAPACK_dstev LAPACK_GLOBAL(dstev,DSTEV)
#define LAPACK_sstevd LAPACK_GLOBAL(sstevd,SSTEVD)
#define LAPACK_dstevd LAPACK_GLOBAL(dstevd,DSTEVD)
#define LAPACK_sstevx LAPACK_GLOBAL(sstevx,SSTEVX)
#define LAPACK_dstevx LAPACK_GLOBAL(dstevx,DSTEVX)
#define LAPACK_sstevr LAPACK_GLOBAL(sstevr,SSTEVR)
#define LAPACK_dstevr LAPACK_GLOBAL(dstevr,DSTEVR)
#define LAPACK_sgees LAPACK_GLOBAL(sgees,SGEES)
#define LAPACK_dgees LAPACK_GLOBAL(dgees,DGEES)
#define LAPACK_cgees LAPACK_GLOBAL(cgees,CGEES)
#define LAPACK_zgees LAPACK_GLOBAL(zgees,ZGEES)
#define LAPACK_sgeesx LAPACK_GLOBAL(sgeesx,SGEESX)
#define LAPACK_dgeesx LAPACK_GLOBAL(dgeesx,DGEESX)
#define LAPACK_cgeesx LAPACK_GLOBAL(cgeesx,CGEESX)
#define LAPACK_zgeesx LAPACK_GLOBAL(zgeesx,ZGEESX)
#define LAPACK_sgeev LAPACK_GLOBAL(sgeev,SGEEV)
#define LAPACK_dgeev LAPACK_GLOBAL(dgeev,DGEEV)
#define LAPACK_cgeev LAPACK_GLOBAL(cgeev,CGEEV)
#define LAPACK_zgeev LAPACK_GLOBAL(zgeev,ZGEEV)
#define LAPACK_sgeevx LAPACK_GLOBAL(sgeevx,SGEEVX)
#define LAPACK_dgeevx LAPACK_GLOBAL(dgeevx,DGEEVX)
#define LAPACK_cgeevx LAPACK_GLOBAL(cgeevx,CGEEVX)
#define LAPACK_zgeevx LAPACK_GLOBAL(zgeevx,ZGEEVX)
#define LAPACK_sgesvd LAPACK_GLOBAL(sgesvd,SGESVD)
#define LAPACK_dgesvd LAPACK_GLOBAL(dgesvd,DGESVD)
#define LAPACK_cgesvd LAPACK_GLOBAL(cgesvd,CGESVD)
#define LAPACK_zgesvd LAPACK_GLOBAL(zgesvd,ZGESVD)
#define LAPACK_sgesdd LAPACK_GLOBAL(sgesdd,SGESDD)
#define LAPACK_dgesdd LAPACK_GLOBAL(dgesdd,DGESDD)
#define LAPACK_cgesdd LAPACK_GLOBAL(cgesdd,CGESDD)
#define LAPACK_zgesdd LAPACK_GLOBAL(zgesdd,ZGESDD)
#define LAPACK_dgejsv LAPACK_GLOBAL(dgejsv,DGEJSV)
#define LAPACK_sgejsv LAPACK_GLOBAL(sgejsv,SGEJSV)
#define LAPACK_dgesvj LAPACK_GLOBAL(dgesvj,DGESVJ)
#define LAPACK_sgesvj LAPACK_GLOBAL(sgesvj,SGESVJ)
#define LAPACK_sggsvd LAPACK_GLOBAL(sggsvd,SGGSVD)
#define LAPACK_dggsvd LAPACK_GLOBAL(dggsvd,DGGSVD)
#define LAPACK_cggsvd LAPACK_GLOBAL(cggsvd,CGGSVD)
#define LAPACK_zggsvd LAPACK_GLOBAL(zggsvd,ZGGSVD)
#define LAPACK_ssygv LAPACK_GLOBAL(ssygv,SSYGV)
#define LAPACK_dsygv LAPACK_GLOBAL(dsygv,DSYGV)
#define LAPACK_chegv LAPACK_GLOBAL(chegv,CHEGV)
#define LAPACK_zhegv LAPACK_GLOBAL(zhegv,ZHEGV)
#define LAPACK_ssygvd LAPACK_GLOBAL(ssygvd,SSYGVD)
#define LAPACK_dsygvd LAPACK_GLOBAL(dsygvd,DSYGVD)
#define LAPACK_chegvd LAPACK_GLOBAL(chegvd,CHEGVD)
#define LAPACK_zhegvd LAPACK_GLOBAL(zhegvd,ZHEGVD)
#define LAPACK_ssygvx LAPACK_GLOBAL(ssygvx,SSYGVX)
#define LAPACK_dsygvx LAPACK_GLOBAL(dsygvx,DSYGVX)
#define LAPACK_chegvx LAPACK_GLOBAL(chegvx,CHEGVX)
#define LAPACK_zhegvx LAPACK_GLOBAL(zhegvx,ZHEGVX)
#define LAPACK_sspgv LAPACK_GLOBAL(sspgv,SSPGV)
#define LAPACK_dspgv LAPACK_GLOBAL(dspgv,DSPGV)
#define LAPACK_chpgv LAPACK_GLOBAL(chpgv,CHPGV)
#define LAPACK_zhpgv LAPACK_GLOBAL(zhpgv,ZHPGV)
#define LAPACK_sspgvd LAPACK_GLOBAL(sspgvd,SSPGVD)
#define LAPACK_dspgvd LAPACK_GLOBAL(dspgvd,DSPGVD)
#define LAPACK_chpgvd LAPACK_GLOBAL(chpgvd,CHPGVD)
#define LAPACK_zhpgvd LAPACK_GLOBAL(zhpgvd,ZHPGVD)
#define LAPACK_sspgvx LAPACK_GLOBAL(sspgvx,SSPGVX)
#define LAPACK_dspgvx LAPACK_GLOBAL(dspgvx,DSPGVX)
#define LAPACK_chpgvx LAPACK_GLOBAL(chpgvx,CHPGVX)
#define LAPACK_zhpgvx LAPACK_GLOBAL(zhpgvx,ZHPGVX)
#define LAPACK_ssbgv LAPACK_GLOBAL(ssbgv,SSBGV)
#define LAPACK_dsbgv LAPACK_GLOBAL(dsbgv,DSBGV)
#define LAPACK_chbgv LAPACK_GLOBAL(chbgv,CHBGV)
#define LAPACK_zhbgv LAPACK_GLOBAL(zhbgv,ZHBGV)
#define LAPACK_ssbgvd LAPACK_GLOBAL(ssbgvd,SSBGVD)
#define LAPACK_dsbgvd LAPACK_GLOBAL(dsbgvd,DSBGVD)
#define LAPACK_chbgvd LAPACK_GLOBAL(chbgvd,CHBGVD)
#define LAPACK_zhbgvd LAPACK_GLOBAL(zhbgvd,ZHBGVD)
#define LAPACK_ssbgvx LAPACK_GLOBAL(ssbgvx,SSBGVX)
#define LAPACK_dsbgvx LAPACK_GLOBAL(dsbgvx,DSBGVX)
#define LAPACK_chbgvx LAPACK_GLOBAL(chbgvx,CHBGVX)
#define LAPACK_zhbgvx LAPACK_GLOBAL(zhbgvx,ZHBGVX)
#define LAPACK_sgges LAPACK_GLOBAL(sgges,SGGES)
#define LAPACK_dgges LAPACK_GLOBAL(dgges,DGGES)
#define LAPACK_cgges LAPACK_GLOBAL(cgges,CGGES)
#define LAPACK_zgges LAPACK_GLOBAL(zgges,ZGGES)
#define LAPACK_sggesx LAPACK_GLOBAL(sggesx,SGGESX)
#define LAPACK_dggesx LAPACK_GLOBAL(dggesx,DGGESX)
#define LAPACK_cggesx LAPACK_GLOBAL(cggesx,CGGESX)
#define LAPACK_zggesx LAPACK_GLOBAL(zggesx,ZGGESX)
#define LAPACK_sggev LAPACK_GLOBAL(sggev,SGGEV)
#define LAPACK_dggev LAPACK_GLOBAL(dggev,DGGEV)
#define LAPACK_cggev LAPACK_GLOBAL(cggev,CGGEV)
#define LAPACK_zggev LAPACK_GLOBAL(zggev,ZGGEV)
#define LAPACK_sggevx LAPACK_GLOBAL(sggevx,SGGEVX)
#define LAPACK_dggevx LAPACK_GLOBAL(dggevx,DGGEVX)
#define LAPACK_cggevx LAPACK_GLOBAL(cggevx,CGGEVX)
#define LAPACK_zggevx LAPACK_GLOBAL(zggevx,ZGGEVX)
#define LAPACK_dsfrk LAPACK_GLOBAL(dsfrk,DSFRK)
#define LAPACK_ssfrk LAPACK_GLOBAL(ssfrk,SSFRK)
#define LAPACK_zhfrk LAPACK_GLOBAL(zhfrk,ZHFRK)
#define LAPACK_chfrk LAPACK_GLOBAL(chfrk,CHFRK)
#define LAPACK_dtfsm LAPACK_GLOBAL(dtfsm,DTFSM)
#define LAPACK_stfsm LAPACK_GLOBAL(stfsm,STFSM)
#define LAPACK_ztfsm LAPACK_GLOBAL(ztfsm,ZTFSM)
#define LAPACK_ctfsm LAPACK_GLOBAL(ctfsm,CTFSM)
#define LAPACK_dtfttp LAPACK_GLOBAL(dtfttp,DTFTTP)
#define LAPACK_stfttp LAPACK_GLOBAL(stfttp,STFTTP)
#define LAPACK_ztfttp LAPACK_GLOBAL(ztfttp,ZTFTTP)
#define LAPACK_ctfttp LAPACK_GLOBAL(ctfttp,CTFTTP)
#define LAPACK_dtfttr LAPACK_GLOBAL(dtfttr,DTFTTR)
#define LAPACK_stfttr LAPACK_GLOBAL(stfttr,STFTTR)
#define LAPACK_ztfttr LAPACK_GLOBAL(ztfttr,ZTFTTR)
#define LAPACK_ctfttr LAPACK_GLOBAL(ctfttr,CTFTTR)
#define LAPACK_dtpttf LAPACK_GLOBAL(dtpttf,DTPTTF)
#define LAPACK_stpttf LAPACK_GLOBAL(stpttf,STPTTF)
#define LAPACK_ztpttf LAPACK_GLOBAL(ztpttf,ZTPTTF)
#define LAPACK_ctpttf LAPACK_GLOBAL(ctpttf,CTPTTF)
#define LAPACK_dtpttr LAPACK_GLOBAL(dtpttr,DTPTTR)
#define LAPACK_stpttr LAPACK_GLOBAL(stpttr,STPTTR)
#define LAPACK_ztpttr LAPACK_GLOBAL(ztpttr,ZTPTTR)
#define LAPACK_ctpttr LAPACK_GLOBAL(ctpttr,CTPTTR)
#define LAPACK_dtrttf LAPACK_GLOBAL(dtrttf,DTRTTF)
#define LAPACK_strttf LAPACK_GLOBAL(strttf,STRTTF)
#define LAPACK_ztrttf LAPACK_GLOBAL(ztrttf,ZTRTTF)
#define LAPACK_ctrttf LAPACK_GLOBAL(ctrttf,CTRTTF)
#define LAPACK_dtrttp LAPACK_GLOBAL(dtrttp,DTRTTP)
#define LAPACK_strttp LAPACK_GLOBAL(strttp,STRTTP)
#define LAPACK_ztrttp LAPACK_GLOBAL(ztrttp,ZTRTTP)
#define LAPACK_ctrttp LAPACK_GLOBAL(ctrttp,CTRTTP)
#define LAPACK_sgeqrfp LAPACK_GLOBAL(sgeqrfp,SGEQRFP)
#define LAPACK_dgeqrfp LAPACK_GLOBAL(dgeqrfp,DGEQRFP)
#define LAPACK_cgeqrfp LAPACK_GLOBAL(cgeqrfp,CGEQRFP)
#define LAPACK_zgeqrfp LAPACK_GLOBAL(zgeqrfp,ZGEQRFP)
#define LAPACK_clacgv LAPACK_GLOBAL(clacgv,CLACGV)
#define LAPACK_zlacgv LAPACK_GLOBAL(zlacgv,ZLACGV)
#define LAPACK_slarnv LAPACK_GLOBAL(slarnv,SLARNV)
#define LAPACK_dlarnv LAPACK_GLOBAL(dlarnv,DLARNV)
#define LAPACK_clarnv LAPACK_GLOBAL(clarnv,CLARNV)
#define LAPACK_zlarnv LAPACK_GLOBAL(zlarnv,ZLARNV)
#define LAPACK_sgeqr2 LAPACK_GLOBAL(sgeqr2,SGEQR2)
#define LAPACK_dgeqr2 LAPACK_GLOBAL(dgeqr2,DGEQR2)
#define LAPACK_cgeqr2 LAPACK_GLOBAL(cgeqr2,CGEQR2)
#define LAPACK_zgeqr2 LAPACK_GLOBAL(zgeqr2,ZGEQR2)
#define LAPACK_slacn2 LAPACK_GLOBAL(slacn2,SLACN2)
#define LAPACK_dlacn2 LAPACK_GLOBAL(dlacn2,DLACN2)
#define LAPACK_clacn2 LAPACK_GLOBAL(clacn2,CLACN2)
#define LAPACK_zlacn2 LAPACK_GLOBAL(zlacn2,ZLACN2)
#define LAPACK_slacpy LAPACK_GLOBAL(slacpy,SLACPY)
#define LAPACK_dlacpy LAPACK_GLOBAL(dlacpy,DLACPY)
#define LAPACK_clacpy LAPACK_GLOBAL(clacpy,CLACPY)
#define LAPACK_zlacpy LAPACK_GLOBAL(zlacpy,ZLACPY)
#define LAPACK_clacp2 LAPACK_GLOBAL(clacp2,CLACP2)
#define LAPACK_zlacp2 LAPACK_GLOBAL(zlacp2,ZLACP2)
#define LAPACK_sgetf2 LAPACK_GLOBAL(sgetf2,SGETF2)
#define LAPACK_dgetf2 LAPACK_GLOBAL(dgetf2,DGETF2)
#define LAPACK_cgetf2 LAPACK_GLOBAL(cgetf2,CGETF2)
#define LAPACK_zgetf2 LAPACK_GLOBAL(zgetf2,ZGETF2)
#define LAPACK_slaswp LAPACK_GLOBAL(slaswp,SLASWP)
#define LAPACK_dlaswp LAPACK_GLOBAL(dlaswp,DLASWP)
#define LAPACK_claswp LAPACK_GLOBAL(claswp,CLASWP)
#define LAPACK_zlaswp LAPACK_GLOBAL(zlaswp,ZLASWP)
#define LAPACK_slange LAPACK_GLOBAL(slange,SLANGE)
#define LAPACK_dlange LAPACK_GLOBAL(dlange,DLANGE)
#define LAPACK_clange LAPACK_GLOBAL(clange,CLANGE)
#define LAPACK_zlange LAPACK_GLOBAL(zlange,ZLANGE)
#define LAPACK_clanhe LAPACK_GLOBAL(clanhe,CLANHE)
#define LAPACK_zlanhe LAPACK_GLOBAL(zlanhe,ZLANHE)
#define LAPACK_slansy LAPACK_GLOBAL(slansy,SLANSY)
#define LAPACK_dlansy LAPACK_GLOBAL(dlansy,DLANSY)
#define LAPACK_clansy LAPACK_GLOBAL(clansy,CLANSY)
#define LAPACK_zlansy LAPACK_GLOBAL(zlansy,ZLANSY)
#define LAPACK_slantr LAPACK_GLOBAL(slantr,SLANTR)
#define LAPACK_dlantr LAPACK_GLOBAL(dlantr,DLANTR)
#define LAPACK_clantr LAPACK_GLOBAL(clantr,CLANTR)
#define LAPACK_zlantr LAPACK_GLOBAL(zlantr,ZLANTR)
#define LAPACK_slamch LAPACK_GLOBAL(slamch,SLAMCH)
#define LAPACK_dlamch LAPACK_GLOBAL(dlamch,DLAMCH)
#define LAPACK_sgelq2 LAPACK_GLOBAL(sgelq2,SGELQ2)
#define LAPACK_dgelq2 LAPACK_GLOBAL(dgelq2,DGELQ2)
#define LAPACK_cgelq2 LAPACK_GLOBAL(cgelq2,CGELQ2)
#define LAPACK_zgelq2 LAPACK_GLOBAL(zgelq2,ZGELQ2)
#define LAPACK_slarfb LAPACK_GLOBAL(slarfb,SLARFB)
#define LAPACK_dlarfb LAPACK_GLOBAL(dlarfb,DLARFB)
#define LAPACK_clarfb LAPACK_GLOBAL(clarfb,CLARFB)
#define LAPACK_zlarfb LAPACK_GLOBAL(zlarfb,ZLARFB)
#define LAPACK_slarfg LAPACK_GLOBAL(slarfg,SLARFG)
#define LAPACK_dlarfg LAPACK_GLOBAL(dlarfg,DLARFG)
#define LAPACK_clarfg LAPACK_GLOBAL(clarfg,CLARFG)
#define LAPACK_zlarfg LAPACK_GLOBAL(zlarfg,ZLARFG)
#define LAPACK_slarft LAPACK_GLOBAL(slarft,SLARFT)
#define LAPACK_dlarft LAPACK_GLOBAL(dlarft,DLARFT)
#define LAPACK_clarft LAPACK_GLOBAL(clarft,CLARFT)
#define LAPACK_zlarft LAPACK_GLOBAL(zlarft,ZLARFT)
#define LAPACK_slarfx LAPACK_GLOBAL(slarfx,SLARFX)
#define LAPACK_dlarfx LAPACK_GLOBAL(dlarfx,DLARFX)
#define LAPACK_clarfx LAPACK_GLOBAL(clarfx,CLARFX)
#define LAPACK_zlarfx LAPACK_GLOBAL(zlarfx,ZLARFX)
#define LAPACK_slatms LAPACK_GLOBAL(slatms,SLATMS)
#define LAPACK_dlatms LAPACK_GLOBAL(dlatms,DLATMS)
#define LAPACK_clatms LAPACK_GLOBAL(clatms,CLATMS)
#define LAPACK_zlatms LAPACK_GLOBAL(zlatms,ZLATMS)
#define LAPACK_slag2d LAPACK_GLOBAL(slag2d,SLAG2D)
#define LAPACK_dlag2s LAPACK_GLOBAL(dlag2s,DLAG2S)
#define LAPACK_clag2z LAPACK_GLOBAL(clag2z,CLAG2Z)
#define LAPACK_zlag2c LAPACK_GLOBAL(zlag2c,ZLAG2C)
#define LAPACK_slauum LAPACK_GLOBAL(slauum,SLAUUM)
#define LAPACK_dlauum LAPACK_GLOBAL(dlauum,DLAUUM)
#define LAPACK_clauum LAPACK_GLOBAL(clauum,CLAUUM)
#define LAPACK_zlauum LAPACK_GLOBAL(zlauum,ZLAUUM)
#define LAPACK_slagge LAPACK_GLOBAL(slagge,SLAGGE)
#define LAPACK_dlagge LAPACK_GLOBAL(dlagge,DLAGGE)
#define LAPACK_clagge LAPACK_GLOBAL(clagge,CLAGGE)
#define LAPACK_zlagge LAPACK_GLOBAL(zlagge,ZLAGGE)
#define LAPACK_slaset LAPACK_GLOBAL(slaset,SLASET)
#define LAPACK_dlaset LAPACK_GLOBAL(dlaset,DLASET)
#define LAPACK_claset LAPACK_GLOBAL(claset,CLASET)
#define LAPACK_zlaset LAPACK_GLOBAL(zlaset,ZLASET)
#define LAPACK_slasrt LAPACK_GLOBAL(slasrt,SLASRT)
#define LAPACK_dlasrt LAPACK_GLOBAL(dlasrt,DLASRT)
#define LAPACK_slagsy LAPACK_GLOBAL(slagsy,SLAGSY)
#define LAPACK_dlagsy LAPACK_GLOBAL(dlagsy,DLAGSY)
#define LAPACK_clagsy LAPACK_GLOBAL(clagsy,CLAGSY)
#define LAPACK_zlagsy LAPACK_GLOBAL(zlagsy,ZLAGSY)
#define LAPACK_claghe LAPACK_GLOBAL(claghe,CLAGHE)
#define LAPACK_zlaghe LAPACK_GLOBAL(zlaghe,ZLAGHE)
#define LAPACK_slapmr LAPACK_GLOBAL(slapmr,SLAPMR)
#define LAPACK_dlapmr LAPACK_GLOBAL(dlapmr,DLAPMR)
#define LAPACK_clapmr LAPACK_GLOBAL(clapmr,CLAPMR)
#define LAPACK_zlapmr LAPACK_GLOBAL(zlapmr,ZLAPMR)
#define LAPACK_slapy2 LAPACK_GLOBAL(slapy2,SLAPY2)
#define LAPACK_dlapy2 LAPACK_GLOBAL(dlapy2,DLAPY2)
#define LAPACK_slapy3 LAPACK_GLOBAL(slapy3,SLAPY3)
#define LAPACK_dlapy3 LAPACK_GLOBAL(dlapy3,DLAPY3)
#define LAPACK_slartgp LAPACK_GLOBAL(slartgp,SLARTGP)
#define LAPACK_dlartgp LAPACK_GLOBAL(dlartgp,DLARTGP)
#define LAPACK_slartgs LAPACK_GLOBAL(slartgs,SLARTGS)
#define LAPACK_dlartgs LAPACK_GLOBAL(dlartgs,DLARTGS)
// LAPACK 3.3.0
#define LAPACK_cbbcsd LAPACK_GLOBAL(cbbcsd,CBBCSD)
#define LAPACK_cheswapr LAPACK_GLOBAL(cheswapr,CHESWAPR)
#define LAPACK_chetri2 LAPACK_GLOBAL(chetri2,CHETRI2)
#define LAPACK_chetri2x LAPACK_GLOBAL(chetri2x,CHETRI2X)
#define LAPACK_chetrs2 LAPACK_GLOBAL(chetrs2,CHETRS2)
#define LAPACK_csyconv LAPACK_GLOBAL(csyconv,CSYCONV)
#define LAPACK_csyswapr LAPACK_GLOBAL(csyswapr,CSYSWAPR)
#define LAPACK_csytri2 LAPACK_GLOBAL(csytri2,CSYTRI2)
#define LAPACK_csytri2x LAPACK_GLOBAL(csytri2x,CSYTRI2X)
#define LAPACK_csytrs2 LAPACK_GLOBAL(csytrs2,CSYTRS2)
#define LAPACK_cunbdb LAPACK_GLOBAL(cunbdb,CUNBDB)
#define LAPACK_cuncsd LAPACK_GLOBAL(cuncsd,CUNCSD)
#define LAPACK_dbbcsd LAPACK_GLOBAL(dbbcsd,DBBCSD)
#define LAPACK_dorbdb LAPACK_GLOBAL(dorbdb,DORBDB)
#define LAPACK_dorcsd LAPACK_GLOBAL(dorcsd,DORCSD)
#define LAPACK_dsyconv LAPACK_GLOBAL(dsyconv,DSYCONV)
#define LAPACK_dsyswapr LAPACK_GLOBAL(dsyswapr,DSYSWAPR)
#define LAPACK_dsytri2 LAPACK_GLOBAL(dsytri2,DSYTRI2)
#define LAPACK_dsytri2x LAPACK_GLOBAL(dsytri2x,DSYTRI2X)
#define LAPACK_dsytrs2 LAPACK_GLOBAL(dsytrs2,DSYTRS2)
#define LAPACK_sbbcsd LAPACK_GLOBAL(sbbcsd,SBBCSD)
#define LAPACK_sorbdb LAPACK_GLOBAL(sorbdb,SORBDB)
#define LAPACK_sorcsd LAPACK_GLOBAL(sorcsd,SORCSD)
#define LAPACK_ssyconv LAPACK_GLOBAL(ssyconv,SSYCONV)
#define LAPACK_ssyswapr LAPACK_GLOBAL(ssyswapr,SSYSWAPR)
#define LAPACK_ssytri2 LAPACK_GLOBAL(ssytri2,SSYTRI2)
#define LAPACK_ssytri2x LAPACK_GLOBAL(ssytri2x,SSYTRI2X)
#define LAPACK_ssytrs2 LAPACK_GLOBAL(ssytrs2,SSYTRS2)
#define LAPACK_zbbcsd LAPACK_GLOBAL(zbbcsd,ZBBCSD)
#define LAPACK_zheswapr LAPACK_GLOBAL(zheswapr,ZHESWAPR)
#define LAPACK_zhetri2 LAPACK_GLOBAL(zhetri2,ZHETRI2)
#define LAPACK_zhetri2x LAPACK_GLOBAL(zhetri2x,ZHETRI2X)
#define LAPACK_zhetrs2 LAPACK_GLOBAL(zhetrs2,ZHETRS2)
#define LAPACK_zsyconv LAPACK_GLOBAL(zsyconv,ZSYCONV)
#define LAPACK_zsyswapr LAPACK_GLOBAL(zsyswapr,ZSYSWAPR)
#define LAPACK_zsytri2 LAPACK_GLOBAL(zsytri2,ZSYTRI2)
#define LAPACK_zsytri2x LAPACK_GLOBAL(zsytri2x,ZSYTRI2X)
#define LAPACK_zsytrs2 LAPACK_GLOBAL(zsytrs2,ZSYTRS2)
#define LAPACK_zunbdb LAPACK_GLOBAL(zunbdb,ZUNBDB)
#define LAPACK_zuncsd LAPACK_GLOBAL(zuncsd,ZUNCSD)
// LAPACK 3.4.0
#define LAPACK_sgemqrt LAPACK_GLOBAL(sgemqrt,SGEMQRT)
#define LAPACK_dgemqrt LAPACK_GLOBAL(dgemqrt,DGEMQRT)
#define LAPACK_cgemqrt LAPACK_GLOBAL(cgemqrt,CGEMQRT)
#define LAPACK_zgemqrt LAPACK_GLOBAL(zgemqrt,ZGEMQRT)
#define LAPACK_sgeqrt LAPACK_GLOBAL(sgeqrt,SGEQRT)
#define LAPACK_dgeqrt LAPACK_GLOBAL(dgeqrt,DGEQRT)
#define LAPACK_cgeqrt LAPACK_GLOBAL(cgeqrt,CGEQRT)
#define LAPACK_zgeqrt LAPACK_GLOBAL(zgeqrt,ZGEQRT)
#define LAPACK_sgeqrt2 LAPACK_GLOBAL(sgeqrt2,SGEQRT2)
#define LAPACK_dgeqrt2 LAPACK_GLOBAL(dgeqrt2,DGEQRT2)
#define LAPACK_cgeqrt2 LAPACK_GLOBAL(cgeqrt2,CGEQRT2)
#define LAPACK_zgeqrt2 LAPACK_GLOBAL(zgeqrt2,ZGEQRT2)
#define LAPACK_sgeqrt3 LAPACK_GLOBAL(sgeqrt3,SGEQRT3)
#define LAPACK_dgeqrt3 LAPACK_GLOBAL(dgeqrt3,DGEQRT3)
#define LAPACK_cgeqrt3 LAPACK_GLOBAL(cgeqrt3,CGEQRT3)
#define LAPACK_zgeqrt3 LAPACK_GLOBAL(zgeqrt3,ZGEQRT3)
#define LAPACK_stpmqrt LAPACK_GLOBAL(stpmqrt,STPMQRT)
#define LAPACK_dtpmqrt LAPACK_GLOBAL(dtpmqrt,DTPMQRT)
#define LAPACK_ctpmqrt LAPACK_GLOBAL(ctpmqrt,CTPMQRT)
#define LAPACK_ztpmqrt LAPACK_GLOBAL(ztpmqrt,ZTPMQRT)
#define LAPACK_dtpqrt LAPACK_GLOBAL(dtpqrt,DTPQRT)
#define LAPACK_ctpqrt LAPACK_GLOBAL(ctpqrt,CTPQRT)
#define LAPACK_ztpqrt LAPACK_GLOBAL(ztpqrt,ZTPQRT)
#define LAPACK_stpqrt2 LAPACK_GLOBAL(stpqrt2,STPQRT2)
#define LAPACK_dtpqrt2 LAPACK_GLOBAL(dtpqrt2,DTPQRT2)
#define LAPACK_ctpqrt2 LAPACK_GLOBAL(ctpqrt2,CTPQRT2)
#define LAPACK_ztpqrt2 LAPACK_GLOBAL(ztpqrt2,ZTPQRT2)
#define LAPACK_stprfb LAPACK_GLOBAL(stprfb,STPRFB)
#define LAPACK_dtprfb LAPACK_GLOBAL(dtprfb,DTPRFB)
#define LAPACK_ctprfb LAPACK_GLOBAL(ctprfb,CTPRFB)
#define LAPACK_ztprfb LAPACK_GLOBAL(ztprfb,ZTPRFB)
// LAPACK 3.X.X
#define LAPACK_ssysv_rook LAPACK_GLOBAL(ssysv_rook,SSYSV_ROOK)
#define LAPACK_dsysv_rook LAPACK_GLOBAL(dsysv_rook,DSYSV_ROOK)
#define LAPACK_csysv_rook LAPACK_GLOBAL(csysv_rook,CSYSV_ROOK)
#define LAPACK_zsysv_rook LAPACK_GLOBAL(zsysv_rook,ZSYSV_ROOK)
#define LAPACK_csyr LAPACK_GLOBAL(csyr,CSYR)
#define LAPACK_zsyr LAPACK_GLOBAL(zsyr,ZSYR)
#define LAPACK_ilaver LAPACK_GLOBAL(ilaver,ILAVER)

void LAPACK_sgetrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_dgetrf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_cgetrf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );
void LAPACK_zgetrf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );
void LAPACK_sgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, float* ab, lapack_int* ldab,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_dgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, double* ab, lapack_int* ldab,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_cgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, lapack_complex_float* ab, lapack_int* ldab,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_zgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, lapack_complex_double* ab, lapack_int* ldab,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_sgttrf( lapack_int* n, float* dl, float* d, float* du, float* du2,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_dgttrf( lapack_int* n, double* dl, double* d, double* du,
                    double* du2, lapack_int* ipiv, lapack_int *info );
void LAPACK_cgttrf( lapack_int* n, lapack_complex_float* dl,
                    lapack_complex_float* d, lapack_complex_float* du,
                    lapack_complex_float* du2, lapack_int* ipiv,
                    lapack_int *info );
void LAPACK_zgttrf( lapack_int* n, lapack_complex_double* dl,
                    lapack_complex_double* d, lapack_complex_double* du,
                    lapack_complex_double* du2, lapack_int* ipiv,
                    lapack_int *info );
void LAPACK_spotrf( char* uplo, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_dpotrf( char* uplo, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_cpotrf( char* uplo, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_zpotrf( char* uplo, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_dpstrf( char* uplo, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int* piv, lapack_int* rank, double* tol,
                    double* work, lapack_int *info );
void LAPACK_spstrf( char* uplo, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int* piv, lapack_int* rank, float* tol, float* work,
                    lapack_int *info );
void LAPACK_zpstrf( char* uplo, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* piv, lapack_int* rank,
                    double* tol, double* work, lapack_int *info );
void LAPACK_cpstrf( char* uplo, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int* piv, lapack_int* rank,
                    float* tol, float* work, lapack_int *info );
void LAPACK_dpftrf( char* transr, char* uplo, lapack_int* n, double* a,
                    lapack_int *info );
void LAPACK_spftrf( char* transr, char* uplo, lapack_int* n, float* a,
                    lapack_int *info );
void LAPACK_zpftrf( char* transr, char* uplo, lapack_int* n,
                    lapack_complex_double* a, lapack_int *info );
void LAPACK_cpftrf( char* transr, char* uplo, lapack_int* n,
                    lapack_complex_float* a, lapack_int *info );
void LAPACK_spptrf( char* uplo, lapack_int* n, float* ap, lapack_int *info );
void LAPACK_dpptrf( char* uplo, lapack_int* n, double* ap, lapack_int *info );
void LAPACK_cpptrf( char* uplo, lapack_int* n, lapack_complex_float* ap,
                    lapack_int *info );
void LAPACK_zpptrf( char* uplo, lapack_int* n, lapack_complex_double* ap,
                    lapack_int *info );
void LAPACK_spbtrf( char* uplo, lapack_int* n, lapack_int* kd, float* ab,
                    lapack_int* ldab, lapack_int *info );
void LAPACK_dpbtrf( char* uplo, lapack_int* n, lapack_int* kd, double* ab,
                    lapack_int* ldab, lapack_int *info );
void LAPACK_cpbtrf( char* uplo, lapack_int* n, lapack_int* kd,
                    lapack_complex_float* ab, lapack_int* ldab,
                    lapack_int *info );
void LAPACK_zpbtrf( char* uplo, lapack_int* n, lapack_int* kd,
                    lapack_complex_double* ab, lapack_int* ldab,
                    lapack_int *info );
void LAPACK_spttrf( lapack_int* n, float* d, float* e, lapack_int *info );
void LAPACK_dpttrf( lapack_int* n, double* d, double* e, lapack_int *info );
void LAPACK_cpttrf( lapack_int* n, float* d, lapack_complex_float* e,
                    lapack_int *info );
void LAPACK_zpttrf( lapack_int* n, double* d, lapack_complex_double* e,
                    lapack_int *info );
void LAPACK_ssytrf( char* uplo, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int* ipiv, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dsytrf( char* uplo, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int* ipiv, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_csytrf( char* uplo, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int* ipiv,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zsytrf( char* uplo, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* ipiv,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_chetrf( char* uplo, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int* ipiv,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zhetrf( char* uplo, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* ipiv,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_ssptrf( char* uplo, lapack_int* n, float* ap, lapack_int* ipiv,
                    lapack_int *info );
void LAPACK_dsptrf( char* uplo, lapack_int* n, double* ap, lapack_int* ipiv,
                    lapack_int *info );
void LAPACK_csptrf( char* uplo, lapack_int* n, lapack_complex_float* ap,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_zsptrf( char* uplo, lapack_int* n, lapack_complex_double* ap,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_chptrf( char* uplo, lapack_int* n, lapack_complex_float* ap,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_zhptrf( char* uplo, lapack_int* n, lapack_complex_double* ap,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_sgetrs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const float* a, lapack_int* lda, const lapack_int* ipiv,
                    float* b, lapack_int* ldb, lapack_int *info );
void LAPACK_dgetrs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const double* a, lapack_int* lda, const lapack_int* ipiv,
                    double* b, lapack_int* ldb, lapack_int *info );
void LAPACK_cgetrs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_int* lda,
                    const lapack_int* ipiv, lapack_complex_float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_zgetrs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_int* ipiv, lapack_complex_double* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_sgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    lapack_int* nrhs, const float* ab, lapack_int* ldab,
                    const lapack_int* ipiv, float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_dgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    lapack_int* nrhs, const double* ab, lapack_int* ldab,
                    const lapack_int* ipiv, double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_cgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    lapack_int* nrhs, const lapack_complex_float* ab,
                    lapack_int* ldab, const lapack_int* ipiv,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_zgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    lapack_int* nrhs, const lapack_complex_double* ab,
                    lapack_int* ldab, const lapack_int* ipiv,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_sgttrs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const float* dl, const float* d, const float* du,
                    const float* du2, const lapack_int* ipiv, float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_dgttrs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const double* dl, const double* d, const double* du,
                    const double* du2, const lapack_int* ipiv, double* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_cgttrs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* dl,
                    const lapack_complex_float* d,
                    const lapack_complex_float* du,
                    const lapack_complex_float* du2, const lapack_int* ipiv,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_zgttrs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* dl,
                    const lapack_complex_double* d,
                    const lapack_complex_double* du,
                    const lapack_complex_double* du2, const lapack_int* ipiv,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_spotrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,
                    lapack_int* lda, float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_dpotrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* a, lapack_int* lda, double* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_cpotrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_zpotrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_dpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* a, double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_spftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const float* a, float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_zpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_complex_double* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_cpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_complex_float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_spptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const float* ap, float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_dpptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* ap, double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_cpptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* ap, lapack_complex_float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_zpptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* ap, lapack_complex_double* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_spbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                    const float* ab, lapack_int* ldab, float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_dpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                    const double* ab, lapack_int* ldab, double* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_cpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                    const lapack_complex_float* ab, lapack_int* ldab,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_zpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                    const lapack_complex_double* ab, lapack_int* ldab,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_spttrs( lapack_int* n, lapack_int* nrhs, const float* d,
                    const float* e, float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_dpttrs( lapack_int* n, lapack_int* nrhs, const double* d,
                    const double* e, double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_cpttrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* d,
                    const lapack_complex_float* e, lapack_complex_float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_zpttrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* d, const lapack_complex_double* e,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_ssytrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,
                    lapack_int* lda, const lapack_int* ipiv, float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_dsytrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* a, lapack_int* lda, const lapack_int* ipiv,
                    double* b, lapack_int* ldb, lapack_int *info );
void LAPACK_csytrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_int* lda,
                    const lapack_int* ipiv, lapack_complex_float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_zsytrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_int* ipiv, lapack_complex_double* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_chetrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_int* lda,
                    const lapack_int* ipiv, lapack_complex_float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_zhetrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_int* ipiv, lapack_complex_double* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_ssptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const float* ap, const lapack_int* ipiv, float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_dsptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* ap, const lapack_int* ipiv, double* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_csptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* ap, const lapack_int* ipiv,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_zsptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* ap, const lapack_int* ipiv,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_chptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* ap, const lapack_int* ipiv,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_zhptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* ap, const lapack_int* ipiv,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_strtrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const float* a, lapack_int* lda, float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_dtrtrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const double* a, lapack_int* lda,
                    double* b, lapack_int* ldb, lapack_int *info );
void LAPACK_ctrtrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_ztrtrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_stptrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const float* ap, float* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_dtptrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const double* ap, double* b,
                    lapack_int* ldb, lapack_int *info );
void LAPACK_ctptrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const lapack_complex_float* ap,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_ztptrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const lapack_complex_double* ap,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_stbtrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* kd, lapack_int* nrhs, const float* ab,
                    lapack_int* ldab, float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_dtbtrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* kd, lapack_int* nrhs, const double* ab,
                    lapack_int* ldab, double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_ctbtrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* kd, lapack_int* nrhs,
                    const lapack_complex_float* ab, lapack_int* ldab,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_ztbtrs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* kd, lapack_int* nrhs,
                    const lapack_complex_double* ab, lapack_int* ldab,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_sgecon( char* norm, lapack_int* n, const float* a, lapack_int* lda,
                    float* anorm, float* rcond, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dgecon( char* norm, lapack_int* n, const double* a, lapack_int* lda,
                    double* anorm, double* rcond, double* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_cgecon( char* norm, lapack_int* n, const lapack_complex_float* a,
                    lapack_int* lda, float* anorm, float* rcond,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zgecon( char* norm, lapack_int* n, const lapack_complex_double* a,
                    lapack_int* lda, double* anorm, double* rcond,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_sgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    const float* ab, lapack_int* ldab, const lapack_int* ipiv,
                    float* anorm, float* rcond, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    const double* ab, lapack_int* ldab, const lapack_int* ipiv,
                    double* anorm, double* rcond, double* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_cgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    const lapack_complex_float* ab, lapack_int* ldab,
                    const lapack_int* ipiv, float* anorm, float* rcond,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    const lapack_complex_double* ab, lapack_int* ldab,
                    const lapack_int* ipiv, double* anorm, double* rcond,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_sgtcon( char* norm, lapack_int* n, const float* dl, const float* d,
                    const float* du, const float* du2, const lapack_int* ipiv,
                    float* anorm, float* rcond, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dgtcon( char* norm, lapack_int* n, const double* dl,
                    const double* d, const double* du, const double* du2,
                    const lapack_int* ipiv, double* anorm, double* rcond,
                    double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_cgtcon( char* norm, lapack_int* n, const lapack_complex_float* dl,
                    const lapack_complex_float* d,
                    const lapack_complex_float* du,
                    const lapack_complex_float* du2, const lapack_int* ipiv,
                    float* anorm, float* rcond, lapack_complex_float* work,
                    lapack_int *info );
void LAPACK_zgtcon( char* norm, lapack_int* n, const lapack_complex_double* dl,
                    const lapack_complex_double* d,
                    const lapack_complex_double* du,
                    const lapack_complex_double* du2, const lapack_int* ipiv,
                    double* anorm, double* rcond, lapack_complex_double* work,
                    lapack_int *info );
void LAPACK_spocon( char* uplo, lapack_int* n, const float* a, lapack_int* lda,
                    float* anorm, float* rcond, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dpocon( char* uplo, lapack_int* n, const double* a, lapack_int* lda,
                    double* anorm, double* rcond, double* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_cpocon( char* uplo, lapack_int* n, const lapack_complex_float* a,
                    lapack_int* lda, float* anorm, float* rcond,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zpocon( char* uplo, lapack_int* n, const lapack_complex_double* a,
                    lapack_int* lda, double* anorm, double* rcond,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_sppcon( char* uplo, lapack_int* n, const float* ap, float* anorm,
                    float* rcond, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dppcon( char* uplo, lapack_int* n, const double* ap, double* anorm,
                    double* rcond, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_cppcon( char* uplo, lapack_int* n, const lapack_complex_float* ap,
                    float* anorm, float* rcond, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zppcon( char* uplo, lapack_int* n, const lapack_complex_double* ap,
                    double* anorm, double* rcond, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_spbcon( char* uplo, lapack_int* n, lapack_int* kd, const float* ab,
                    lapack_int* ldab, float* anorm, float* rcond, float* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_dpbcon( char* uplo, lapack_int* n, lapack_int* kd, const double* ab,
                    lapack_int* ldab, double* anorm, double* rcond,
                    double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_cpbcon( char* uplo, lapack_int* n, lapack_int* kd,
                    const lapack_complex_float* ab, lapack_int* ldab,
                    float* anorm, float* rcond, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zpbcon( char* uplo, lapack_int* n, lapack_int* kd,
                    const lapack_complex_double* ab, lapack_int* ldab,
                    double* anorm, double* rcond, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_sptcon( lapack_int* n, const float* d, const float* e, float* anorm,
                    float* rcond, float* work, lapack_int *info );
void LAPACK_dptcon( lapack_int* n, const double* d, const double* e,
                    double* anorm, double* rcond, double* work,
                    lapack_int *info );
void LAPACK_cptcon( lapack_int* n, const float* d,
                    const lapack_complex_float* e, float* anorm, float* rcond,
                    float* work, lapack_int *info );
void LAPACK_zptcon( lapack_int* n, const double* d,
                    const lapack_complex_double* e, double* anorm,
                    double* rcond, double* work, lapack_int *info );
void LAPACK_ssycon( char* uplo, lapack_int* n, const float* a, lapack_int* lda,
                    const lapack_int* ipiv, float* anorm, float* rcond,
                    float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_dsycon( char* uplo, lapack_int* n, const double* a, lapack_int* lda,
                    const lapack_int* ipiv, double* anorm, double* rcond,
                    double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_csycon( char* uplo, lapack_int* n, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_int* ipiv, float* anorm,
                    float* rcond, lapack_complex_float* work,
                    lapack_int *info );
void LAPACK_zsycon( char* uplo, lapack_int* n, const lapack_complex_double* a,
                    lapack_int* lda, const lapack_int* ipiv, double* anorm,
                    double* rcond, lapack_complex_double* work,
                    lapack_int *info );
void LAPACK_checon( char* uplo, lapack_int* n, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_int* ipiv, float* anorm,
                    float* rcond, lapack_complex_float* work,
                    lapack_int *info );
void LAPACK_zhecon( char* uplo, lapack_int* n, const lapack_complex_double* a,
                    lapack_int* lda, const lapack_int* ipiv, double* anorm,
                    double* rcond, lapack_complex_double* work,
                    lapack_int *info );
void LAPACK_sspcon( char* uplo, lapack_int* n, const float* ap,
                    const lapack_int* ipiv, float* anorm, float* rcond,
                    float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_dspcon( char* uplo, lapack_int* n, const double* ap,
                    const lapack_int* ipiv, double* anorm, double* rcond,
                    double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_cspcon( char* uplo, lapack_int* n, const lapack_complex_float* ap,
                    const lapack_int* ipiv, float* anorm, float* rcond,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zspcon( char* uplo, lapack_int* n, const lapack_complex_double* ap,
                    const lapack_int* ipiv, double* anorm, double* rcond,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_chpcon( char* uplo, lapack_int* n, const lapack_complex_float* ap,
                    const lapack_int* ipiv, float* anorm, float* rcond,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zhpcon( char* uplo, lapack_int* n, const lapack_complex_double* ap,
                    const lapack_int* ipiv, double* anorm, double* rcond,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_strcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    const float* a, lapack_int* lda, float* rcond, float* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_dtrcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    const double* a, lapack_int* lda, double* rcond,
                    double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_ctrcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    const lapack_complex_float* a, lapack_int* lda,
                    float* rcond, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_ztrcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    const lapack_complex_double* a, lapack_int* lda,
                    double* rcond, lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_stpcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    const float* ap, float* rcond, float* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_dtpcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    const double* ap, double* rcond, double* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_ctpcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    const lapack_complex_float* ap, float* rcond,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_ztpcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    const lapack_complex_double* ap, double* rcond,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_stbcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    lapack_int* kd, const float* ab, lapack_int* ldab,
                    float* rcond, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dtbcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    lapack_int* kd, const double* ab, lapack_int* ldab,
                    double* rcond, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_ctbcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    lapack_int* kd, const lapack_complex_float* ab,
                    lapack_int* ldab, float* rcond, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_ztbcon( char* norm, char* uplo, char* diag, lapack_int* n,
                    lapack_int* kd, const lapack_complex_double* ab,
                    lapack_int* ldab, double* rcond,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_sgerfs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const float* a, lapack_int* lda, const float* af,
                    lapack_int* ldaf, const lapack_int* ipiv, const float* b,
                    lapack_int* ldb, float* x, lapack_int* ldx, float* ferr,
                    float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dgerfs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const double* a, lapack_int* lda, const double* af,
                    lapack_int* ldaf, const lapack_int* ipiv, const double* b,
                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,
                    double* berr, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_cgerfs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* af, lapack_int* ldaf,
                    const lapack_int* ipiv, const lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
                    float* ferr, float* berr, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zgerfs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* af, lapack_int* ldaf,
                    const lapack_int* ipiv, const lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                    double* ferr, double* berr, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_dgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,
                     const double* a, lapack_int* lda, const double* af,
                     lapack_int* ldaf, const lapack_int* ipiv, const double* r,
                     const double* c, const double* b, lapack_int* ldb,
                     double* x, lapack_int* ldx, double* rcond, double* berr,
                     lapack_int* n_err_bnds, double* err_bnds_norm,
                     double* err_bnds_comp, lapack_int* nparams, double* params,
                     double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_sgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,
                     const float* a, lapack_int* lda, const float* af,
                     lapack_int* ldaf, const lapack_int* ipiv, const float* r,
                     const float* c, const float* b, lapack_int* ldb, float* x,
                     lapack_int* ldx, float* rcond, float* berr,
                     lapack_int* n_err_bnds, float* err_bnds_norm,
                     float* err_bnds_comp, lapack_int* nparams, float* params,
                     float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_zgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,
                     const lapack_complex_double* a, lapack_int* lda,
                     const lapack_complex_double* af, lapack_int* ldaf,
                     const lapack_int* ipiv, const double* r, const double* c,
                     const lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
                     double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params,
                     lapack_complex_double* work, double* rwork,
                     lapack_int *info );
void LAPACK_cgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,
                     const lapack_complex_float* a, lapack_int* lda,
                     const lapack_complex_float* af, lapack_int* ldaf,
                     const lapack_int* ipiv, const float* r, const float* c,
                     const lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,
                     float* err_bnds_comp, lapack_int* nparams, float* params,
                     lapack_complex_float* work, float* rwork,
                     lapack_int *info );
void LAPACK_sgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    lapack_int* nrhs, const float* ab, lapack_int* ldab,
                    const float* afb, lapack_int* ldafb, const lapack_int* ipiv,
                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
                    float* ferr, float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    lapack_int* nrhs, const double* ab, lapack_int* ldab,
                    const double* afb, lapack_int* ldafb,
                    const lapack_int* ipiv, const double* b, lapack_int* ldb,
                    double* x, lapack_int* ldx, double* ferr, double* berr,
                    double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_cgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    lapack_int* nrhs, const lapack_complex_float* ab,
                    lapack_int* ldab, const lapack_complex_float* afb,
                    lapack_int* ldafb, const lapack_int* ipiv,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
                    float* berr, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
                    lapack_int* nrhs, const lapack_complex_double* ab,
                    lapack_int* ldab, const lapack_complex_double* afb,
                    lapack_int* ldafb, const lapack_int* ipiv,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
                    double* berr, lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_dgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, lapack_int* nrhs, const double* ab,
                     lapack_int* ldab, const double* afb, lapack_int* ldafb,
                     const lapack_int* ipiv, const double* r, const double* c,
                     const double* b, lapack_int* ldb, double* x,
                     lapack_int* ldx, double* rcond, double* berr,
                     lapack_int* n_err_bnds, double* err_bnds_norm,
                     double* err_bnds_comp, lapack_int* nparams, double* params,
                     double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_sgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, lapack_int* nrhs, const float* ab,
                     lapack_int* ldab, const float* afb, lapack_int* ldafb,
                     const lapack_int* ipiv, const float* r, const float* c,
                     const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
                     float* rcond, float* berr, lapack_int* n_err_bnds,
                     float* err_bnds_norm, float* err_bnds_comp,
                     lapack_int* nparams, float* params, float* work,
                     lapack_int* iwork, lapack_int *info );
void LAPACK_zgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, lapack_int* nrhs,
                     const lapack_complex_double* ab, lapack_int* ldab,
                     const lapack_complex_double* afb, lapack_int* ldafb,
                     const lapack_int* ipiv, const double* r, const double* c,
                     const lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
                     double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params,
                     lapack_complex_double* work, double* rwork,
                     lapack_int *info );
void LAPACK_cgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, lapack_int* nrhs,
                     const lapack_complex_float* ab, lapack_int* ldab,
                     const lapack_complex_float* afb, lapack_int* ldafb,
                     const lapack_int* ipiv, const float* r, const float* c,
                     const lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,
                     float* err_bnds_comp, lapack_int* nparams, float* params,
                     lapack_complex_float* work, float* rwork,
                     lapack_int *info );
void LAPACK_sgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const float* dl, const float* d, const float* du,
                    const float* dlf, const float* df, const float* duf,
                    const float* du2, const lapack_int* ipiv, const float* b,
                    lapack_int* ldb, float* x, lapack_int* ldx, float* ferr,
                    float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const double* dl, const double* d, const double* du,
                    const double* dlf, const double* df, const double* duf,
                    const double* du2, const lapack_int* ipiv, const double* b,
                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,
                    double* berr, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_cgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* dl,
                    const lapack_complex_float* d,
                    const lapack_complex_float* du,
                    const lapack_complex_float* dlf,
                    const lapack_complex_float* df,
                    const lapack_complex_float* duf,
                    const lapack_complex_float* du2, const lapack_int* ipiv,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
                    float* berr, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* dl,
                    const lapack_complex_double* d,
                    const lapack_complex_double* du,
                    const lapack_complex_double* dlf,
                    const lapack_complex_double* df,
                    const lapack_complex_double* duf,
                    const lapack_complex_double* du2, const lapack_int* ipiv,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
                    double* berr, lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_sporfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,
                    lapack_int* lda, const float* af, lapack_int* ldaf,
                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
                    float* ferr, float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dporfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* a, lapack_int* lda, const double* af,
                    lapack_int* ldaf, const double* b, lapack_int* ldb,
                    double* x, lapack_int* ldx, double* ferr, double* berr,
                    double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_cporfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* af, lapack_int* ldaf,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
                    float* berr, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zporfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* af, lapack_int* ldaf,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
                    double* berr, lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_dporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
                     const double* a, lapack_int* lda, const double* af,
                     lapack_int* ldaf, const double* s, const double* b,
                     lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,
                     double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params, double* work,
                     lapack_int* iwork, lapack_int *info );
void LAPACK_sporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
                     const float* a, lapack_int* lda, const float* af,
                     lapack_int* ldaf, const float* s, const float* b,
                     lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,
                     float* err_bnds_comp, lapack_int* nparams, float* params,
                     float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_zporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
                     const lapack_complex_double* a, lapack_int* lda,
                     const lapack_complex_double* af, lapack_int* ldaf,
                     const double* s, const lapack_complex_double* b,
                     lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                     double* rcond, double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params,
                     lapack_complex_double* work, double* rwork,
                     lapack_int *info );
void LAPACK_cporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
                     const lapack_complex_float* a, lapack_int* lda,
                     const lapack_complex_float* af, lapack_int* ldaf,
                     const float* s, const lapack_complex_float* b,
                     lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
                     float* rcond, float* berr, lapack_int* n_err_bnds,
                     float* err_bnds_norm, float* err_bnds_comp,
                     lapack_int* nparams, float* params,
                     lapack_complex_float* work, float* rwork,
                     lapack_int *info );
void LAPACK_spprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const float* ap, const float* afp, const float* b,
                    lapack_int* ldb, float* x, lapack_int* ldx, float* ferr,
                    float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dpprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* ap, const double* afp, const double* b,
                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,
                    double* berr, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_cpprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* ap,
                    const lapack_complex_float* afp,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
                    float* berr, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zpprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* ap,
                    const lapack_complex_double* afp,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
                    double* berr, lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_spbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                    const float* ab, lapack_int* ldab, const float* afb,
                    lapack_int* ldafb, const float* b, lapack_int* ldb,
                    float* x, lapack_int* ldx, float* ferr, float* berr,
                    float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_dpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                    const double* ab, lapack_int* ldab, const double* afb,
                    lapack_int* ldafb, const double* b, lapack_int* ldb,
                    double* x, lapack_int* ldx, double* ferr, double* berr,
                    double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_cpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                    const lapack_complex_float* ab, lapack_int* ldab,
                    const lapack_complex_float* afb, lapack_int* ldafb,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
                    float* berr, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                    const lapack_complex_double* ab, lapack_int* ldab,
                    const lapack_complex_double* afb, lapack_int* ldafb,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
                    double* berr, lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_sptrfs( lapack_int* n, lapack_int* nrhs, const float* d,
                    const float* e, const float* df, const float* ef,
                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
                    float* ferr, float* berr, float* work, lapack_int *info );
void LAPACK_dptrfs( lapack_int* n, lapack_int* nrhs, const double* d,
                    const double* e, const double* df, const double* ef,
                    const double* b, lapack_int* ldb, double* x,
                    lapack_int* ldx, double* ferr, double* berr, double* work,
                    lapack_int *info );
void LAPACK_cptrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* d,
                    const lapack_complex_float* e, const float* df,
                    const lapack_complex_float* ef,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
                    float* berr, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zptrfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* d, const lapack_complex_double* e,
                    const double* df, const lapack_complex_double* ef,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
                    double* berr, lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_ssyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,
                    lapack_int* lda, const float* af, lapack_int* ldaf,
                    const lapack_int* ipiv, const float* b, lapack_int* ldb,
                    float* x, lapack_int* ldx, float* ferr, float* berr,
                    float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_dsyrfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* a, lapack_int* lda, const double* af,
                    lapack_int* ldaf, const lapack_int* ipiv, const double* b,
                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,
                    double* berr, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_csyrfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* af, lapack_int* ldaf,
                    const lapack_int* ipiv, const lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
                    float* ferr, float* berr, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zsyrfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* af, lapack_int* ldaf,
                    const lapack_int* ipiv, const lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                    double* ferr, double* berr, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_dsyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
                     const double* a, lapack_int* lda, const double* af,
                     lapack_int* ldaf, const lapack_int* ipiv, const double* s,
                     const double* b, lapack_int* ldb, double* x,
                     lapack_int* ldx, double* rcond, double* berr,
                     lapack_int* n_err_bnds, double* err_bnds_norm,
                     double* err_bnds_comp, lapack_int* nparams, double* params,
                     double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_ssyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
                     const float* a, lapack_int* lda, const float* af,
                     lapack_int* ldaf, const lapack_int* ipiv, const float* s,
                     const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
                     float* rcond, float* berr, lapack_int* n_err_bnds,
                     float* err_bnds_norm, float* err_bnds_comp,
                     lapack_int* nparams, float* params, float* work,
                     lapack_int* iwork, lapack_int *info );
void LAPACK_zsyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
                     const lapack_complex_double* a, lapack_int* lda,
                     const lapack_complex_double* af, lapack_int* ldaf,
                     const lapack_int* ipiv, const double* s,
                     const lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
                     double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params,
                     lapack_complex_double* work, double* rwork,
                     lapack_int *info );
void LAPACK_csyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
                     const lapack_complex_float* a, lapack_int* lda,
                     const lapack_complex_float* af, lapack_int* ldaf,
                     const lapack_int* ipiv, const float* s,
                     const lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,
                     float* err_bnds_comp, lapack_int* nparams, float* params,
                     lapack_complex_float* work, float* rwork,
                     lapack_int *info );
void LAPACK_cherfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* af, lapack_int* ldaf,
                    const lapack_int* ipiv, const lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
                    float* ferr, float* berr, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zherfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* af, lapack_int* ldaf,
                    const lapack_int* ipiv, const lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                    double* ferr, double* berr, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_zherfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
                     const lapack_complex_double* a, lapack_int* lda,
                     const lapack_complex_double* af, lapack_int* ldaf,
                     const lapack_int* ipiv, const double* s,
                     const lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
                     double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params,
                     lapack_complex_double* work, double* rwork,
                     lapack_int *info );
void LAPACK_cherfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
                     const lapack_complex_float* a, lapack_int* lda,
                     const lapack_complex_float* af, lapack_int* ldaf,
                     const lapack_int* ipiv, const float* s,
                     const lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,
                     float* err_bnds_comp, lapack_int* nparams, float* params,
                     lapack_complex_float* work, float* rwork,
                     lapack_int *info );
void LAPACK_ssprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const float* ap, const float* afp, const lapack_int* ipiv,
                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
                    float* ferr, float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dsprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* ap, const double* afp, const lapack_int* ipiv,
                    const double* b, lapack_int* ldb, double* x,
                    lapack_int* ldx, double* ferr, double* berr, double* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_csprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* ap,
                    const lapack_complex_float* afp, const lapack_int* ipiv,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
                    float* berr, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zsprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* ap,
                    const lapack_complex_double* afp, const lapack_int* ipiv,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
                    double* berr, lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_chprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* ap,
                    const lapack_complex_float* afp, const lapack_int* ipiv,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
                    float* berr, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zhprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* ap,
                    const lapack_complex_double* afp, const lapack_int* ipiv,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
                    double* berr, lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_strrfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const float* a, lapack_int* lda,
                    const float* b, lapack_int* ldb, const float* x,
                    lapack_int* ldx, float* ferr, float* berr, float* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_dtrrfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const double* a, lapack_int* lda,
                    const double* b, lapack_int* ldb, const double* x,
                    lapack_int* ldx, double* ferr, double* berr, double* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_ctrrfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* b,
                    lapack_int* ldb, const lapack_complex_float* x,
                    lapack_int* ldx, float* ferr, float* berr,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_ztrrfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const lapack_complex_double* a,
                    lapack_int* lda, const lapack_complex_double* b,
                    lapack_int* ldb, const lapack_complex_double* x,
                    lapack_int* ldx, double* ferr, double* berr,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_stprfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const float* ap, const float* b,
                    lapack_int* ldb, const float* x, lapack_int* ldx,
                    float* ferr, float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dtprfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const double* ap, const double* b,
                    lapack_int* ldb, const double* x, lapack_int* ldx,
                    double* ferr, double* berr, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_ctprfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const lapack_complex_float* ap,
                    const lapack_complex_float* b, lapack_int* ldb,
                    const lapack_complex_float* x, lapack_int* ldx, float* ferr,
                    float* berr, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_ztprfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* nrhs, const lapack_complex_double* ap,
                    const lapack_complex_double* b, lapack_int* ldb,
                    const lapack_complex_double* x, lapack_int* ldx,
                    double* ferr, double* berr, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_stbrfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* kd, lapack_int* nrhs, const float* ab,
                    lapack_int* ldab, const float* b, lapack_int* ldb,
                    const float* x, lapack_int* ldx, float* ferr, float* berr,
                    float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_dtbrfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* kd, lapack_int* nrhs, const double* ab,
                    lapack_int* ldab, const double* b, lapack_int* ldb,
                    const double* x, lapack_int* ldx, double* ferr,
                    double* berr, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_ctbrfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* kd, lapack_int* nrhs,
                    const lapack_complex_float* ab, lapack_int* ldab,
                    const lapack_complex_float* b, lapack_int* ldb,
                    const lapack_complex_float* x, lapack_int* ldx, float* ferr,
                    float* berr, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_ztbrfs( char* uplo, char* trans, char* diag, lapack_int* n,
                    lapack_int* kd, lapack_int* nrhs,
                    const lapack_complex_double* ab, lapack_int* ldab,
                    const lapack_complex_double* b, lapack_int* ldb,
                    const lapack_complex_double* x, lapack_int* ldx,
                    double* ferr, double* berr, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_sgetri( lapack_int* n, float* a, lapack_int* lda,
                    const lapack_int* ipiv, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dgetri( lapack_int* n, double* a, lapack_int* lda,
                    const lapack_int* ipiv, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cgetri( lapack_int* n, lapack_complex_float* a, lapack_int* lda,
                    const lapack_int* ipiv, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zgetri( lapack_int* n, lapack_complex_double* a, lapack_int* lda,
                    const lapack_int* ipiv, lapack_complex_double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_spotri( char* uplo, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_dpotri( char* uplo, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_cpotri( char* uplo, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_zpotri( char* uplo, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_dpftri( char* transr, char* uplo, lapack_int* n, double* a,
                    lapack_int *info );
void LAPACK_spftri( char* transr, char* uplo, lapack_int* n, float* a,
                    lapack_int *info );
void LAPACK_zpftri( char* transr, char* uplo, lapack_int* n,
                    lapack_complex_double* a, lapack_int *info );
void LAPACK_cpftri( char* transr, char* uplo, lapack_int* n,
                    lapack_complex_float* a, lapack_int *info );
void LAPACK_spptri( char* uplo, lapack_int* n, float* ap, lapack_int *info );
void LAPACK_dpptri( char* uplo, lapack_int* n, double* ap, lapack_int *info );
void LAPACK_cpptri( char* uplo, lapack_int* n, lapack_complex_float* ap,
                    lapack_int *info );
void LAPACK_zpptri( char* uplo, lapack_int* n, lapack_complex_double* ap,
                    lapack_int *info );
void LAPACK_ssytri( char* uplo, lapack_int* n, float* a, lapack_int* lda,
                    const lapack_int* ipiv, float* work, lapack_int *info );
void LAPACK_dsytri( char* uplo, lapack_int* n, double* a, lapack_int* lda,
                    const lapack_int* ipiv, double* work, lapack_int *info );
void LAPACK_csytri( char* uplo, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, const lapack_int* ipiv,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zsytri( char* uplo, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, const lapack_int* ipiv,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_chetri( char* uplo, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, const lapack_int* ipiv,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zhetri( char* uplo, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, const lapack_int* ipiv,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_ssptri( char* uplo, lapack_int* n, float* ap,
                    const lapack_int* ipiv, float* work, lapack_int *info );
void LAPACK_dsptri( char* uplo, lapack_int* n, double* ap,
                    const lapack_int* ipiv, double* work, lapack_int *info );
void LAPACK_csptri( char* uplo, lapack_int* n, lapack_complex_float* ap,
                    const lapack_int* ipiv, lapack_complex_float* work,
                    lapack_int *info );
void LAPACK_zsptri( char* uplo, lapack_int* n, lapack_complex_double* ap,
                    const lapack_int* ipiv, lapack_complex_double* work,
                    lapack_int *info );
void LAPACK_chptri( char* uplo, lapack_int* n, lapack_complex_float* ap,
                    const lapack_int* ipiv, lapack_complex_float* work,
                    lapack_int *info );
void LAPACK_zhptri( char* uplo, lapack_int* n, lapack_complex_double* ap,
                    const lapack_int* ipiv, lapack_complex_double* work,
                    lapack_int *info );
void LAPACK_strtri( char* uplo, char* diag, lapack_int* n, float* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_dtrtri( char* uplo, char* diag, lapack_int* n, double* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_ctrtri( char* uplo, char* diag, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_ztrtri( char* uplo, char* diag, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_dtftri( char* transr, char* uplo, char* diag, lapack_int* n,
                    double* a, lapack_int *info );
void LAPACK_stftri( char* transr, char* uplo, char* diag, lapack_int* n,
                    float* a, lapack_int *info );
void LAPACK_ztftri( char* transr, char* uplo, char* diag, lapack_int* n,
                    lapack_complex_double* a, lapack_int *info );
void LAPACK_ctftri( char* transr, char* uplo, char* diag, lapack_int* n,
                    lapack_complex_float* a, lapack_int *info );
void LAPACK_stptri( char* uplo, char* diag, lapack_int* n, float* ap,
                    lapack_int *info );
void LAPACK_dtptri( char* uplo, char* diag, lapack_int* n, double* ap,
                    lapack_int *info );
void LAPACK_ctptri( char* uplo, char* diag, lapack_int* n,
                    lapack_complex_float* ap, lapack_int *info );
void LAPACK_ztptri( char* uplo, char* diag, lapack_int* n,
                    lapack_complex_double* ap, lapack_int *info );
void LAPACK_sgeequ( lapack_int* m, lapack_int* n, const float* a,
                    lapack_int* lda, float* r, float* c, float* rowcnd,
                    float* colcnd, float* amax, lapack_int *info );
void LAPACK_dgeequ( lapack_int* m, lapack_int* n, const double* a,
                    lapack_int* lda, double* r, double* c, double* rowcnd,
                    double* colcnd, double* amax, lapack_int *info );
void LAPACK_cgeequ( lapack_int* m, lapack_int* n, const lapack_complex_float* a,
                    lapack_int* lda, float* r, float* c, float* rowcnd,
                    float* colcnd, float* amax, lapack_int *info );
void LAPACK_zgeequ( lapack_int* m, lapack_int* n,
                    const lapack_complex_double* a, lapack_int* lda, double* r,
                    double* c, double* rowcnd, double* colcnd, double* amax,
                    lapack_int *info );
void LAPACK_dgeequb( lapack_int* m, lapack_int* n, const double* a,
                     lapack_int* lda, double* r, double* c, double* rowcnd,
                     double* colcnd, double* amax, lapack_int *info );
void LAPACK_sgeequb( lapack_int* m, lapack_int* n, const float* a,
                     lapack_int* lda, float* r, float* c, float* rowcnd,
                     float* colcnd, float* amax, lapack_int *info );
void LAPACK_zgeequb( lapack_int* m, lapack_int* n,
                     const lapack_complex_double* a, lapack_int* lda, double* r,
                     double* c, double* rowcnd, double* colcnd, double* amax,
                     lapack_int *info );
void LAPACK_cgeequb( lapack_int* m, lapack_int* n,
                     const lapack_complex_float* a, lapack_int* lda, float* r,
                     float* c, float* rowcnd, float* colcnd, float* amax,
                     lapack_int *info );
void LAPACK_sgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, const float* ab, lapack_int* ldab, float* r,
                    float* c, float* rowcnd, float* colcnd, float* amax,
                    lapack_int *info );
void LAPACK_dgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, const double* ab, lapack_int* ldab,
                    double* r, double* c, double* rowcnd, double* colcnd,
                    double* amax, lapack_int *info );
void LAPACK_cgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, const lapack_complex_float* ab,
                    lapack_int* ldab, float* r, float* c, float* rowcnd,
                    float* colcnd, float* amax, lapack_int *info );
void LAPACK_zgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, const lapack_complex_double* ab,
                    lapack_int* ldab, double* r, double* c, double* rowcnd,
                    double* colcnd, double* amax, lapack_int *info );
void LAPACK_dgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, const double* ab, lapack_int* ldab,
                     double* r, double* c, double* rowcnd, double* colcnd,
                     double* amax, lapack_int *info );
void LAPACK_sgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, const float* ab, lapack_int* ldab,
                     float* r, float* c, float* rowcnd, float* colcnd,
                     float* amax, lapack_int *info );
void LAPACK_zgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, const lapack_complex_double* ab,
                     lapack_int* ldab, double* r, double* c, double* rowcnd,
                     double* colcnd, double* amax, lapack_int *info );
void LAPACK_cgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, const lapack_complex_float* ab,
                     lapack_int* ldab, float* r, float* c, float* rowcnd,
                     float* colcnd, float* amax, lapack_int *info );
void LAPACK_spoequ( lapack_int* n, const float* a, lapack_int* lda, float* s,
                    float* scond, float* amax, lapack_int *info );
void LAPACK_dpoequ( lapack_int* n, const double* a, lapack_int* lda, double* s,
                    double* scond, double* amax, lapack_int *info );
void LAPACK_cpoequ( lapack_int* n, const lapack_complex_float* a,
                    lapack_int* lda, float* s, float* scond, float* amax,
                    lapack_int *info );
void LAPACK_zpoequ( lapack_int* n, const lapack_complex_double* a,
                    lapack_int* lda, double* s, double* scond, double* amax,
                    lapack_int *info );
void LAPACK_dpoequb( lapack_int* n, const double* a, lapack_int* lda, double* s,
                     double* scond, double* amax, lapack_int *info );
void LAPACK_spoequb( lapack_int* n, const float* a, lapack_int* lda, float* s,
                     float* scond, float* amax, lapack_int *info );
void LAPACK_zpoequb( lapack_int* n, const lapack_complex_double* a,
                     lapack_int* lda, double* s, double* scond, double* amax,
                     lapack_int *info );
void LAPACK_cpoequb( lapack_int* n, const lapack_complex_float* a,
                     lapack_int* lda, float* s, float* scond, float* amax,
                     lapack_int *info );
void LAPACK_sppequ( char* uplo, lapack_int* n, const float* ap, float* s,
                    float* scond, float* amax, lapack_int *info );
void LAPACK_dppequ( char* uplo, lapack_int* n, const double* ap, double* s,
                    double* scond, double* amax, lapack_int *info );
void LAPACK_cppequ( char* uplo, lapack_int* n, const lapack_complex_float* ap,
                    float* s, float* scond, float* amax, lapack_int *info );
void LAPACK_zppequ( char* uplo, lapack_int* n, const lapack_complex_double* ap,
                    double* s, double* scond, double* amax, lapack_int *info );
void LAPACK_spbequ( char* uplo, lapack_int* n, lapack_int* kd, const float* ab,
                    lapack_int* ldab, float* s, float* scond, float* amax,
                    lapack_int *info );
void LAPACK_dpbequ( char* uplo, lapack_int* n, lapack_int* kd, const double* ab,
                    lapack_int* ldab, double* s, double* scond, double* amax,
                    lapack_int *info );
void LAPACK_cpbequ( char* uplo, lapack_int* n, lapack_int* kd,
                    const lapack_complex_float* ab, lapack_int* ldab, float* s,
                    float* scond, float* amax, lapack_int *info );
void LAPACK_zpbequ( char* uplo, lapack_int* n, lapack_int* kd,
                    const lapack_complex_double* ab, lapack_int* ldab,
                    double* s, double* scond, double* amax, lapack_int *info );
void LAPACK_dsyequb( char* uplo, lapack_int* n, const double* a,
                     lapack_int* lda, double* s, double* scond, double* amax,
                     double* work, lapack_int *info );
void LAPACK_ssyequb( char* uplo, lapack_int* n, const float* a, lapack_int* lda,
                     float* s, float* scond, float* amax, float* work,
                     lapack_int *info );
void LAPACK_zsyequb( char* uplo, lapack_int* n, const lapack_complex_double* a,
                     lapack_int* lda, double* s, double* scond, double* amax,
                     lapack_complex_double* work, lapack_int *info );
void LAPACK_csyequb( char* uplo, lapack_int* n, const lapack_complex_float* a,
                     lapack_int* lda, float* s, float* scond, float* amax,
                     lapack_complex_float* work, lapack_int *info );
void LAPACK_zheequb( char* uplo, lapack_int* n, const lapack_complex_double* a,
                     lapack_int* lda, double* s, double* scond, double* amax,
                     lapack_complex_double* work, lapack_int *info );
void LAPACK_cheequb( char* uplo, lapack_int* n, const lapack_complex_float* a,
                     lapack_int* lda, float* s, float* scond, float* amax,
                     lapack_complex_float* work, lapack_int *info );
void LAPACK_sgesv( lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda,
                   lapack_int* ipiv, float* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_dgesv( lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda,
                   lapack_int* ipiv, double* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_cgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_float* a,
                   lapack_int* lda, lapack_int* ipiv, lapack_complex_float* b,
                   lapack_int* ldb, lapack_int *info );
void LAPACK_zgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* a,
                   lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b,
                   lapack_int* ldb, lapack_int *info );
void LAPACK_dsgesv( lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda,
                    lapack_int* ipiv, double* b, lapack_int* ldb, double* x,
                    lapack_int* ldx, double* work, float* swork,
                    lapack_int* iter, lapack_int *info );
void LAPACK_zcgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                    lapack_complex_double* work, lapack_complex_float* swork,
                    double* rwork, lapack_int* iter, lapack_int *info );
void LAPACK_sgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                    float* a, lapack_int* lda, float* af, lapack_int* ldaf,
                    lapack_int* ipiv, char* equed, float* r, float* c, float* b,
                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                    double* a, lapack_int* lda, double* af, lapack_int* ldaf,
                    lapack_int* ipiv, char* equed, double* r, double* c,
                    double* b, lapack_int* ldb, double* x, lapack_int* ldx,
                    double* rcond, double* ferr, double* berr, double* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_cgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* af, lapack_int* ldaf,
                    lapack_int* ipiv, char* equed, float* r, float* c,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* af, lapack_int* ldaf,
                    lapack_int* ipiv, char* equed, double* r, double* c,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* rcond,
                    double* ferr, double* berr, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_dgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                     double* a, lapack_int* lda, double* af, lapack_int* ldaf,
                     lapack_int* ipiv, char* equed, double* r, double* c,
                     double* b, lapack_int* ldb, double* x, lapack_int* ldx,
                     double* rcond, double* rpvgrw, double* berr,
                     lapack_int* n_err_bnds, double* err_bnds_norm,
                     double* err_bnds_comp, lapack_int* nparams, double* params,
                     double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_sgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                     float* a, lapack_int* lda, float* af, lapack_int* ldaf,
                     lapack_int* ipiv, char* equed, float* r, float* c,
                     float* b, lapack_int* ldb, float* x, lapack_int* ldx,
                     float* rcond, float* rpvgrw, float* berr,
                     lapack_int* n_err_bnds, float* err_bnds_norm,
                     float* err_bnds_comp, lapack_int* nparams, float* params,
                     float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_zgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                     lapack_complex_double* a, lapack_int* lda,
                     lapack_complex_double* af, lapack_int* ldaf,
                     lapack_int* ipiv, char* equed, double* r, double* c,
                     lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params,
                     lapack_complex_double* work, double* rwork,
                     lapack_int *info );
void LAPACK_cgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                     lapack_complex_float* a, lapack_int* lda,
                     lapack_complex_float* af, lapack_int* ldaf,
                     lapack_int* ipiv, char* equed, float* r, float* c,
                     lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
                     float* err_bnds_norm, float* err_bnds_comp,
                     lapack_int* nparams, float* params,
                     lapack_complex_float* work, float* rwork,
                     lapack_int *info );
void LAPACK_sgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,
                   lapack_int* nrhs, float* ab, lapack_int* ldab,
                   lapack_int* ipiv, float* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_dgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,
                   lapack_int* nrhs, double* ab, lapack_int* ldab,
                   lapack_int* ipiv, double* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_cgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,
                   lapack_int* nrhs, lapack_complex_float* ab, lapack_int* ldab,
                   lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_zgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,
                   lapack_int* nrhs, lapack_complex_double* ab,
                   lapack_int* ldab, lapack_int* ipiv, lapack_complex_double* b,
                   lapack_int* ldb, lapack_int *info );
void LAPACK_sgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, lapack_int* nrhs, float* ab,
                    lapack_int* ldab, float* afb, lapack_int* ldafb,
                    lapack_int* ipiv, char* equed, float* r, float* c, float* b,
                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, lapack_int* nrhs, double* ab,
                    lapack_int* ldab, double* afb, lapack_int* ldafb,
                    lapack_int* ipiv, char* equed, double* r, double* c,
                    double* b, lapack_int* ldb, double* x, lapack_int* ldx,
                    double* rcond, double* ferr, double* berr, double* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_cgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab,
                    lapack_int* ldab, lapack_complex_float* afb,
                    lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r,
                    float* c, lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, lapack_int* nrhs, lapack_complex_double* ab,
                    lapack_int* ldab, lapack_complex_double* afb,
                    lapack_int* ldafb, lapack_int* ipiv, char* equed, double* r,
                    double* c, lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* rcond,
                    double* ferr, double* berr, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_dgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, lapack_int* nrhs, double* ab,
                     lapack_int* ldab, double* afb, lapack_int* ldafb,
                     lapack_int* ipiv, char* equed, double* r, double* c,
                     double* b, lapack_int* ldb, double* x, lapack_int* ldx,
                     double* rcond, double* rpvgrw, double* berr,
                     lapack_int* n_err_bnds, double* err_bnds_norm,
                     double* err_bnds_comp, lapack_int* nparams, double* params,
                     double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_sgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, lapack_int* nrhs, float* ab,
                     lapack_int* ldab, float* afb, lapack_int* ldafb,
                     lapack_int* ipiv, char* equed, float* r, float* c,
                     float* b, lapack_int* ldb, float* x, lapack_int* ldx,
                     float* rcond, float* rpvgrw, float* berr,
                     lapack_int* n_err_bnds, float* err_bnds_norm,
                     float* err_bnds_comp, lapack_int* nparams, float* params,
                     float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_zgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, lapack_int* nrhs,
                     lapack_complex_double* ab, lapack_int* ldab,
                     lapack_complex_double* afb, lapack_int* ldafb,
                     lapack_int* ipiv, char* equed, double* r, double* c,
                     lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params,
                     lapack_complex_double* work, double* rwork,
                     lapack_int *info );
void LAPACK_cgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
                     lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab,
                     lapack_int* ldab, lapack_complex_float* afb,
                     lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r,
                     float* c, lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
                     float* err_bnds_norm, float* err_bnds_comp,
                     lapack_int* nparams, float* params,
                     lapack_complex_float* work, float* rwork,
                     lapack_int *info );
void LAPACK_sgtsv( lapack_int* n, lapack_int* nrhs, float* dl, float* d,
                   float* du, float* b, lapack_int* ldb, lapack_int *info );
void LAPACK_dgtsv( lapack_int* n, lapack_int* nrhs, double* dl, double* d,
                   double* du, double* b, lapack_int* ldb, lapack_int *info );
void LAPACK_cgtsv( lapack_int* n, lapack_int* nrhs, lapack_complex_float* dl,
                   lapack_complex_float* d, lapack_complex_float* du,
                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );
void LAPACK_zgtsv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* dl,
                   lapack_complex_double* d, lapack_complex_double* du,
                   lapack_complex_double* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_sgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                    const float* dl, const float* d, const float* du,
                    float* dlf, float* df, float* duf, float* du2,
                    lapack_int* ipiv, const float* b, lapack_int* ldb, float* x,
                    lapack_int* ldx, float* rcond, float* ferr, float* berr,
                    float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_dgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                    const double* dl, const double* d, const double* du,
                    double* dlf, double* df, double* duf, double* du2,
                    lapack_int* ipiv, const double* b, lapack_int* ldb,
                    double* x, lapack_int* ldx, double* rcond, double* ferr,
                    double* berr, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_cgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* dl,
                    const lapack_complex_float* d,
                    const lapack_complex_float* du, lapack_complex_float* dlf,
                    lapack_complex_float* df, lapack_complex_float* duf,
                    lapack_complex_float* du2, lapack_int* ipiv,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* dl,
                    const lapack_complex_double* d,
                    const lapack_complex_double* du, lapack_complex_double* dlf,
                    lapack_complex_double* df, lapack_complex_double* duf,
                    lapack_complex_double* du2, lapack_int* ipiv,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* rcond,
                    double* ferr, double* berr, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_sposv( char* uplo, lapack_int* n, lapack_int* nrhs, float* a,
                   lapack_int* lda, float* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_dposv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a,
                   lapack_int* lda, double* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_cposv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_float* a, lapack_int* lda,
                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );
void LAPACK_zposv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_double* a, lapack_int* lda,
                   lapack_complex_double* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_dsposv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a,
                    lapack_int* lda, double* b, lapack_int* ldb, double* x,
                    lapack_int* ldx, double* work, float* swork,
                    lapack_int* iter, lapack_int *info );
void LAPACK_zcposv( char* uplo, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx,
                    lapack_complex_double* work, lapack_complex_float* swork,
                    double* rwork, lapack_int* iter, lapack_int *info );
void LAPACK_sposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    float* a, lapack_int* lda, float* af, lapack_int* ldaf,
                    char* equed, float* s, float* b, lapack_int* ldb, float* x,
                    lapack_int* ldx, float* rcond, float* ferr, float* berr,
                    float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_dposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    double* a, lapack_int* lda, double* af, lapack_int* ldaf,
                    char* equed, double* s, double* b, lapack_int* ldb,
                    double* x, lapack_int* ldx, double* rcond, double* ferr,
                    double* berr, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_cposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* af, lapack_int* ldaf, char* equed,
                    float* s, lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* af, lapack_int* ldaf, char* equed,
                    double* s, lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* rcond,
                    double* ferr, double* berr, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_dposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                     double* a, lapack_int* lda, double* af, lapack_int* ldaf,
                     char* equed, double* s, double* b, lapack_int* ldb,
                     double* x, lapack_int* ldx, double* rcond, double* rpvgrw,
                     double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params, double* work,
                     lapack_int* iwork, lapack_int *info );
void LAPACK_sposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                     float* a, lapack_int* lda, float* af, lapack_int* ldaf,
                     char* equed, float* s, float* b, lapack_int* ldb, float* x,
                     lapack_int* ldx, float* rcond, float* rpvgrw, float* berr,
                     lapack_int* n_err_bnds, float* err_bnds_norm,
                     float* err_bnds_comp, lapack_int* nparams, float* params,
                     float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_zposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                     lapack_complex_double* a, lapack_int* lda,
                     lapack_complex_double* af, lapack_int* ldaf, char* equed,
                     double* s, lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params,
                     lapack_complex_double* work, double* rwork,
                     lapack_int *info );
void LAPACK_cposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                     lapack_complex_float* a, lapack_int* lda,
                     lapack_complex_float* af, lapack_int* ldaf, char* equed,
                     float* s, lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
                     float* err_bnds_norm, float* err_bnds_comp,
                     lapack_int* nparams, float* params,
                     lapack_complex_float* work, float* rwork,
                     lapack_int *info );
void LAPACK_sppsv( char* uplo, lapack_int* n, lapack_int* nrhs, float* ap,
                   float* b, lapack_int* ldb, lapack_int *info );
void LAPACK_dppsv( char* uplo, lapack_int* n, lapack_int* nrhs, double* ap,
                   double* b, lapack_int* ldb, lapack_int *info );
void LAPACK_cppsv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_float* ap, lapack_complex_float* b,
                   lapack_int* ldb, lapack_int *info );
void LAPACK_zppsv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_double* ap, lapack_complex_double* b,
                   lapack_int* ldb, lapack_int *info );
void LAPACK_sppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    float* ap, float* afp, char* equed, float* s, float* b,
                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    double* ap, double* afp, char* equed, double* s, double* b,
                    lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,
                    double* ferr, double* berr, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_cppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_float* ap, lapack_complex_float* afp,
                    char* equed, float* s, lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
                    float* rcond, float* ferr, float* berr,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_double* ap, lapack_complex_double* afp,
                    char* equed, double* s, lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                    double* rcond, double* ferr, double* berr,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_spbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                   float* ab, lapack_int* ldab, float* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_dpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                   double* ab, lapack_int* ldab, double* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_cpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                   lapack_complex_float* ab, lapack_int* ldab,
                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );
void LAPACK_zpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
                   lapack_complex_double* ab, lapack_int* ldab,
                   lapack_complex_double* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_spbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,
                    lapack_int* nrhs, float* ab, lapack_int* ldab, float* afb,
                    lapack_int* ldafb, char* equed, float* s, float* b,
                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,
                    lapack_int* nrhs, double* ab, lapack_int* ldab, double* afb,
                    lapack_int* ldafb, char* equed, double* s, double* b,
                    lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,
                    double* ferr, double* berr, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_cpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,
                    lapack_int* nrhs, lapack_complex_float* ab,
                    lapack_int* ldab, lapack_complex_float* afb,
                    lapack_int* ldafb, char* equed, float* s,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,
                    lapack_int* nrhs, lapack_complex_double* ab,
                    lapack_int* ldab, lapack_complex_double* afb,
                    lapack_int* ldafb, char* equed, double* s,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* x, lapack_int* ldx, double* rcond,
                    double* ferr, double* berr, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_sptsv( lapack_int* n, lapack_int* nrhs, float* d, float* e,
                   float* b, lapack_int* ldb, lapack_int *info );
void LAPACK_dptsv( lapack_int* n, lapack_int* nrhs, double* d, double* e,
                   double* b, lapack_int* ldb, lapack_int *info );
void LAPACK_cptsv( lapack_int* n, lapack_int* nrhs, float* d,
                   lapack_complex_float* e, lapack_complex_float* b,
                   lapack_int* ldb, lapack_int *info );
void LAPACK_zptsv( lapack_int* n, lapack_int* nrhs, double* d,
                   lapack_complex_double* e, lapack_complex_double* b,
                   lapack_int* ldb, lapack_int *info );
void LAPACK_sptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const float* d,
                    const float* e, float* df, float* ef, const float* b,
                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, float* work, lapack_int *info );
void LAPACK_dptsvx( char* fact, lapack_int* n, lapack_int* nrhs,
                    const double* d, const double* e, double* df, double* ef,
                    const double* b, lapack_int* ldb, double* x,
                    lapack_int* ldx, double* rcond, double* ferr, double* berr,
                    double* work, lapack_int *info );
void LAPACK_cptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const float* d,
                    const lapack_complex_float* e, float* df,
                    lapack_complex_float* ef, const lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
                    float* rcond, float* ferr, float* berr,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zptsvx( char* fact, lapack_int* n, lapack_int* nrhs,
                    const double* d, const lapack_complex_double* e, double* df,
                    lapack_complex_double* ef, const lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                    double* rcond, double* ferr, double* berr,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_ssysv( char* uplo, lapack_int* n, lapack_int* nrhs, float* a,
                   lapack_int* lda, lapack_int* ipiv, float* b, lapack_int* ldb,
                   float* work, lapack_int* lwork, lapack_int *info );
void LAPACK_dsysv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a,
                   lapack_int* lda, lapack_int* ipiv, double* b,
                   lapack_int* ldb, double* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_csysv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv,
                   lapack_complex_float* b, lapack_int* ldb,
                   lapack_complex_float* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_zsysv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv,
                   lapack_complex_double* b, lapack_int* ldb,
                   lapack_complex_double* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_ssysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const float* a, lapack_int* lda, float* af,
                    lapack_int* ldaf, lapack_int* ipiv, const float* b,
                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
                    float* ferr, float* berr, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_dsysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* a, lapack_int* lda, double* af,
                    lapack_int* ldaf, lapack_int* ipiv, const double* b,
                    lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,
                    double* ferr, double* berr, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_csysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* af, lapack_int* ldaf,
                    lapack_int* ipiv, const lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
                    float* rcond, float* ferr, float* berr,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int *info );
void LAPACK_zsysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* af, lapack_int* ldaf,
                    lapack_int* ipiv, const lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                    double* rcond, double* ferr, double* berr,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int *info );
void LAPACK_dsysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                     double* a, lapack_int* lda, double* af, lapack_int* ldaf,
                     lapack_int* ipiv, char* equed, double* s, double* b,
                     lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,
                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params, double* work,
                     lapack_int* iwork, lapack_int *info );
void LAPACK_ssysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                     float* a, lapack_int* lda, float* af, lapack_int* ldaf,
                     lapack_int* ipiv, char* equed, float* s, float* b,
                     lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
                     float* err_bnds_norm, float* err_bnds_comp,
                     lapack_int* nparams, float* params, float* work,
                     lapack_int* iwork, lapack_int *info );
void LAPACK_zsysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                     lapack_complex_double* a, lapack_int* lda,
                     lapack_complex_double* af, lapack_int* ldaf,
                     lapack_int* ipiv, char* equed, double* s,
                     lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params,
                     lapack_complex_double* work, double* rwork,
                     lapack_int *info );
void LAPACK_csysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                     lapack_complex_float* a, lapack_int* lda,
                     lapack_complex_float* af, lapack_int* ldaf,
                     lapack_int* ipiv, char* equed, float* s,
                     lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
                     float* err_bnds_norm, float* err_bnds_comp,
                     lapack_int* nparams, float* params,
                     lapack_complex_float* work, float* rwork,
                     lapack_int *info );
void LAPACK_chesv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv,
                   lapack_complex_float* b, lapack_int* ldb,
                   lapack_complex_float* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_zhesv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv,
                   lapack_complex_double* b, lapack_int* ldb,
                   lapack_complex_double* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_chesvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* af, lapack_int* ldaf,
                    lapack_int* ipiv, const lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
                    float* rcond, float* ferr, float* berr,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int *info );
void LAPACK_zhesvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* af, lapack_int* ldaf,
                    lapack_int* ipiv, const lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                    double* rcond, double* ferr, double* berr,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int *info );
void LAPACK_zhesvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                     lapack_complex_double* a, lapack_int* lda,
                     lapack_complex_double* af, lapack_int* ldaf,
                     lapack_int* ipiv, char* equed, double* s,
                     lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
                     double* err_bnds_norm, double* err_bnds_comp,
                     lapack_int* nparams, double* params,
                     lapack_complex_double* work, double* rwork,
                     lapack_int *info );
void LAPACK_chesvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                     lapack_complex_float* a, lapack_int* lda,
                     lapack_complex_float* af, lapack_int* ldaf,
                     lapack_int* ipiv, char* equed, float* s,
                     lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
                     float* err_bnds_norm, float* err_bnds_comp,
                     lapack_int* nparams, float* params,
                     lapack_complex_float* work, float* rwork,
                     lapack_int *info );
void LAPACK_sspsv( char* uplo, lapack_int* n, lapack_int* nrhs, float* ap,
                   lapack_int* ipiv, float* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_dspsv( char* uplo, lapack_int* n, lapack_int* nrhs, double* ap,
                   lapack_int* ipiv, double* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_cspsv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_float* ap, lapack_int* ipiv,
                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );
void LAPACK_zspsv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_double* ap, lapack_int* ipiv,
                   lapack_complex_double* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_sspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const float* ap, float* afp, lapack_int* ipiv,
                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
                    float* rcond, float* ferr, float* berr, float* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_dspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const double* ap, double* afp, lapack_int* ipiv,
                    const double* b, lapack_int* ldb, double* x,
                    lapack_int* ldx, double* rcond, double* ferr, double* berr,
                    double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_cspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* ap, lapack_complex_float* afp,
                    lapack_int* ipiv, const lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
                    float* rcond, float* ferr, float* berr,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* ap, lapack_complex_double* afp,
                    lapack_int* ipiv, const lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                    double* rcond, double* ferr, double* berr,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_chpsv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_float* ap, lapack_int* ipiv,
                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );
void LAPACK_zhpsv( char* uplo, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_double* ap, lapack_int* ipiv,
                   lapack_complex_double* b, lapack_int* ldb,
                   lapack_int *info );
void LAPACK_chpsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_float* ap, lapack_complex_float* afp,
                    lapack_int* ipiv, const lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
                    float* rcond, float* ferr, float* berr,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zhpsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
                    const lapack_complex_double* ap, lapack_complex_double* afp,
                    lapack_int* ipiv, const lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
                    double* rcond, double* ferr, double* berr,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_sgeqrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    float* tau, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dgeqrf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    double* tau, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cgeqrf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* tau,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zgeqrf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sgeqpf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int* jpvt, float* tau, float* work,
                    lapack_int *info );
void LAPACK_dgeqpf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int* jpvt, double* tau, double* work,
                    lapack_int *info );
void LAPACK_cgeqpf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int* jpvt,
                    lapack_complex_float* tau, lapack_complex_float* work,
                    float* rwork, lapack_int *info );
void LAPACK_zgeqpf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* jpvt,
                    lapack_complex_double* tau, lapack_complex_double* work,
                    double* rwork, lapack_int *info );
void LAPACK_sgeqp3( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int* jpvt, float* tau, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dgeqp3( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int* jpvt, double* tau, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cgeqp3( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int* jpvt,
                    lapack_complex_float* tau, lapack_complex_float* work,
                    lapack_int* lwork, float* rwork, lapack_int *info );
void LAPACK_zgeqp3( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* jpvt,
                    lapack_complex_double* tau, lapack_complex_double* work,
                    lapack_int* lwork, double* rwork, lapack_int *info );
void LAPACK_sorgqr( lapack_int* m, lapack_int* n, lapack_int* k, float* a,
                    lapack_int* lda, const float* tau, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dorgqr( lapack_int* m, lapack_int* n, lapack_int* k, double* a,
                    lapack_int* lda, const double* tau, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sormqr( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const float* a, lapack_int* lda,
                    const float* tau, float* c, lapack_int* ldc, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dormqr( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const double* a, lapack_int* lda,
                    const double* tau, double* c, lapack_int* ldc, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cungqr( lapack_int* m, lapack_int* n, lapack_int* k,
                    lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* tau, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zungqr( lapack_int* m, lapack_int* n, lapack_int* k,
                    lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cunmqr( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* tau,
                    lapack_complex_float* c, lapack_int* ldc,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zunmqr( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const lapack_complex_double* a,
                    lapack_int* lda, const lapack_complex_double* tau,
                    lapack_complex_double* c, lapack_int* ldc,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sgelqf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    float* tau, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dgelqf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    double* tau, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cgelqf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* tau,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zgelqf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sorglq( lapack_int* m, lapack_int* n, lapack_int* k, float* a,
                    lapack_int* lda, const float* tau, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dorglq( lapack_int* m, lapack_int* n, lapack_int* k, double* a,
                    lapack_int* lda, const double* tau, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sormlq( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const float* a, lapack_int* lda,
                    const float* tau, float* c, lapack_int* ldc, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dormlq( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const double* a, lapack_int* lda,
                    const double* tau, double* c, lapack_int* ldc, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cunglq( lapack_int* m, lapack_int* n, lapack_int* k,
                    lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* tau, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zunglq( lapack_int* m, lapack_int* n, lapack_int* k,
                    lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cunmlq( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* tau,
                    lapack_complex_float* c, lapack_int* ldc,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zunmlq( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const lapack_complex_double* a,
                    lapack_int* lda, const lapack_complex_double* tau,
                    lapack_complex_double* c, lapack_int* ldc,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sgeqlf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    float* tau, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dgeqlf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    double* tau, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cgeqlf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* tau,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zgeqlf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sorgql( lapack_int* m, lapack_int* n, lapack_int* k, float* a,
                    lapack_int* lda, const float* tau, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dorgql( lapack_int* m, lapack_int* n, lapack_int* k, double* a,
                    lapack_int* lda, const double* tau, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cungql( lapack_int* m, lapack_int* n, lapack_int* k,
                    lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* tau, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zungql( lapack_int* m, lapack_int* n, lapack_int* k,
                    lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sormql( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const float* a, lapack_int* lda,
                    const float* tau, float* c, lapack_int* ldc, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dormql( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const double* a, lapack_int* lda,
                    const double* tau, double* c, lapack_int* ldc, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cunmql( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* tau,
                    lapack_complex_float* c, lapack_int* ldc,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zunmql( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const lapack_complex_double* a,
                    lapack_int* lda, const lapack_complex_double* tau,
                    lapack_complex_double* c, lapack_int* ldc,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sgerqf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    float* tau, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dgerqf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    double* tau, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cgerqf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* tau,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zgerqf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sorgrq( lapack_int* m, lapack_int* n, lapack_int* k, float* a,
                    lapack_int* lda, const float* tau, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dorgrq( lapack_int* m, lapack_int* n, lapack_int* k, double* a,
                    lapack_int* lda, const double* tau, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cungrq( lapack_int* m, lapack_int* n, lapack_int* k,
                    lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* tau, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zungrq( lapack_int* m, lapack_int* n, lapack_int* k,
                    lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sormrq( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const float* a, lapack_int* lda,
                    const float* tau, float* c, lapack_int* ldc, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dormrq( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const double* a, lapack_int* lda,
                    const double* tau, double* c, lapack_int* ldc, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cunmrq( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* tau,
                    lapack_complex_float* c, lapack_int* ldc,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zunmrq( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, const lapack_complex_double* a,
                    lapack_int* lda, const lapack_complex_double* tau,
                    lapack_complex_double* c, lapack_int* ldc,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_stzrzf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    float* tau, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dtzrzf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    double* tau, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_ctzrzf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* tau,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_ztzrzf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sormrz( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, lapack_int* l, const float* a,
                    lapack_int* lda, const float* tau, float* c,
                    lapack_int* ldc, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dormrz( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, lapack_int* l, const double* a,
                    lapack_int* lda, const double* tau, double* c,
                    lapack_int* ldc, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cunmrz( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, lapack_int* l, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* tau,
                    lapack_complex_float* c, lapack_int* ldc,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zunmrz( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* k, lapack_int* l,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* tau, lapack_complex_double* c,
                    lapack_int* ldc, lapack_complex_double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sggqrf( lapack_int* n, lapack_int* m, lapack_int* p, float* a,
                    lapack_int* lda, float* taua, float* b, lapack_int* ldb,
                    float* taub, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dggqrf( lapack_int* n, lapack_int* m, lapack_int* p, double* a,
                    lapack_int* lda, double* taua, double* b, lapack_int* ldb,
                    double* taub, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cggqrf( lapack_int* n, lapack_int* m, lapack_int* p,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* taua, lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* taub,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zggqrf( lapack_int* n, lapack_int* m, lapack_int* p,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* taua, lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* taub,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sggrqf( lapack_int* m, lapack_int* p, lapack_int* n, float* a,
                    lapack_int* lda, float* taua, float* b, lapack_int* ldb,
                    float* taub, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dggrqf( lapack_int* m, lapack_int* p, lapack_int* n, double* a,
                    lapack_int* lda, double* taua, double* b, lapack_int* ldb,
                    double* taub, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cggrqf( lapack_int* m, lapack_int* p, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* taua, lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* taub,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zggrqf( lapack_int* m, lapack_int* p, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* taua, lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* taub,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sgebrd( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    float* d, float* e, float* tauq, float* taup, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dgebrd( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    double* d, double* e, double* tauq, double* taup,
                    double* work, lapack_int* lwork, lapack_int *info );
void LAPACK_cgebrd( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, float* d, float* e,
                    lapack_complex_float* tauq, lapack_complex_float* taup,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zgebrd( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, double* d, double* e,
                    lapack_complex_double* tauq, lapack_complex_double* taup,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,
                    lapack_int* kl, lapack_int* ku, float* ab, lapack_int* ldab,
                    float* d, float* e, float* q, lapack_int* ldq, float* pt,
                    lapack_int* ldpt, float* c, lapack_int* ldc, float* work,
                    lapack_int *info );
void LAPACK_dgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,
                    lapack_int* kl, lapack_int* ku, double* ab,
                    lapack_int* ldab, double* d, double* e, double* q,
                    lapack_int* ldq, double* pt, lapack_int* ldpt, double* c,
                    lapack_int* ldc, double* work, lapack_int *info );
void LAPACK_cgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,
                    lapack_int* kl, lapack_int* ku, lapack_complex_float* ab,
                    lapack_int* ldab, float* d, float* e,
                    lapack_complex_float* q, lapack_int* ldq,
                    lapack_complex_float* pt, lapack_int* ldpt,
                    lapack_complex_float* c, lapack_int* ldc,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,
                    lapack_int* kl, lapack_int* ku, lapack_complex_double* ab,
                    lapack_int* ldab, double* d, double* e,
                    lapack_complex_double* q, lapack_int* ldq,
                    lapack_complex_double* pt, lapack_int* ldpt,
                    lapack_complex_double* c, lapack_int* ldc,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_sorgbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,
                    float* a, lapack_int* lda, const float* tau, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dorgbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,
                    double* a, lapack_int* lda, const double* tau, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sormbr( char* vect, char* side, char* trans, lapack_int* m,
                    lapack_int* n, lapack_int* k, const float* a,
                    lapack_int* lda, const float* tau, float* c,
                    lapack_int* ldc, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dormbr( char* vect, char* side, char* trans, lapack_int* m,
                    lapack_int* n, lapack_int* k, const double* a,
                    lapack_int* lda, const double* tau, double* c,
                    lapack_int* ldc, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cungbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,
                    lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* tau, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zungbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,
                    lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cunmbr( char* vect, char* side, char* trans, lapack_int* m,
                    lapack_int* n, lapack_int* k, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* tau,
                    lapack_complex_float* c, lapack_int* ldc,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zunmbr( char* vect, char* side, char* trans, lapack_int* m,
                    lapack_int* n, lapack_int* k,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* tau, lapack_complex_double* c,
                    lapack_int* ldc, lapack_complex_double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,
                    lapack_int* nru, lapack_int* ncc, float* d, float* e,
                    float* vt, lapack_int* ldvt, float* u, lapack_int* ldu,
                    float* c, lapack_int* ldc, float* work, lapack_int *info );
void LAPACK_dbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,
                    lapack_int* nru, lapack_int* ncc, double* d, double* e,
                    double* vt, lapack_int* ldvt, double* u, lapack_int* ldu,
                    double* c, lapack_int* ldc, double* work,
                    lapack_int *info );
void LAPACK_cbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,
                    lapack_int* nru, lapack_int* ncc, float* d, float* e,
                    lapack_complex_float* vt, lapack_int* ldvt,
                    lapack_complex_float* u, lapack_int* ldu,
                    lapack_complex_float* c, lapack_int* ldc, float* work,
                    lapack_int *info );
void LAPACK_zbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,
                    lapack_int* nru, lapack_int* ncc, double* d, double* e,
                    lapack_complex_double* vt, lapack_int* ldvt,
                    lapack_complex_double* u, lapack_int* ldu,
                    lapack_complex_double* c, lapack_int* ldc, double* work,
                    lapack_int *info );
void LAPACK_sbdsdc( char* uplo, char* compq, lapack_int* n, float* d, float* e,
                    float* u, lapack_int* ldu, float* vt, lapack_int* ldvt,
                    float* q, lapack_int* iq, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dbdsdc( char* uplo, char* compq, lapack_int* n, double* d,
                    double* e, double* u, lapack_int* ldu, double* vt,
                    lapack_int* ldvt, double* q, lapack_int* iq, double* work,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_ssytrd( char* uplo, lapack_int* n, float* a, lapack_int* lda,
                    float* d, float* e, float* tau, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dsytrd( char* uplo, lapack_int* n, double* a, lapack_int* lda,
                    double* d, double* e, double* tau, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sorgtr( char* uplo, lapack_int* n, float* a, lapack_int* lda,
                    const float* tau, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dorgtr( char* uplo, lapack_int* n, double* a, lapack_int* lda,
                    const double* tau, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_sormtr( char* side, char* uplo, char* trans, lapack_int* m,
                    lapack_int* n, const float* a, lapack_int* lda,
                    const float* tau, float* c, lapack_int* ldc, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dormtr( char* side, char* uplo, char* trans, lapack_int* m,
                    lapack_int* n, const double* a, lapack_int* lda,
                    const double* tau, double* c, lapack_int* ldc, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_chetrd( char* uplo, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, float* d, float* e,
                    lapack_complex_float* tau, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zhetrd( char* uplo, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, double* d, double* e,
                    lapack_complex_double* tau, lapack_complex_double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cungtr( char* uplo, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* tau,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zungtr( char* uplo, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, const lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cunmtr( char* side, char* uplo, char* trans, lapack_int* m,
                    lapack_int* n, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* tau,
                    lapack_complex_float* c, lapack_int* ldc,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_zunmtr( char* side, char* uplo, char* trans, lapack_int* m,
                    lapack_int* n, const lapack_complex_double* a,
                    lapack_int* lda, const lapack_complex_double* tau,
                    lapack_complex_double* c, lapack_int* ldc,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_ssptrd( char* uplo, lapack_int* n, float* ap, float* d, float* e,
                    float* tau, lapack_int *info );
void LAPACK_dsptrd( char* uplo, lapack_int* n, double* ap, double* d, double* e,
                    double* tau, lapack_int *info );
void LAPACK_sopgtr( char* uplo, lapack_int* n, const float* ap,
                    const float* tau, float* q, lapack_int* ldq, float* work,
                    lapack_int *info );
void LAPACK_dopgtr( char* uplo, lapack_int* n, const double* ap,
                    const double* tau, double* q, lapack_int* ldq, double* work,
                    lapack_int *info );
void LAPACK_sopmtr( char* side, char* uplo, char* trans, lapack_int* m,
                    lapack_int* n, const float* ap, const float* tau, float* c,
                    lapack_int* ldc, float* work, lapack_int *info );
void LAPACK_dopmtr( char* side, char* uplo, char* trans, lapack_int* m,
                    lapack_int* n, const double* ap, const double* tau,
                    double* c, lapack_int* ldc, double* work,
                    lapack_int *info );
void LAPACK_chptrd( char* uplo, lapack_int* n, lapack_complex_float* ap,
                    float* d, float* e, lapack_complex_float* tau,
                    lapack_int *info );
void LAPACK_zhptrd( char* uplo, lapack_int* n, lapack_complex_double* ap,
                    double* d, double* e, lapack_complex_double* tau,
                    lapack_int *info );
void LAPACK_cupgtr( char* uplo, lapack_int* n, const lapack_complex_float* ap,
                    const lapack_complex_float* tau, lapack_complex_float* q,
                    lapack_int* ldq, lapack_complex_float* work,
                    lapack_int *info );
void LAPACK_zupgtr( char* uplo, lapack_int* n, const lapack_complex_double* ap,
                    const lapack_complex_double* tau, lapack_complex_double* q,
                    lapack_int* ldq, lapack_complex_double* work,
                    lapack_int *info );
void LAPACK_cupmtr( char* side, char* uplo, char* trans, lapack_int* m,
                    lapack_int* n, const lapack_complex_float* ap,
                    const lapack_complex_float* tau, lapack_complex_float* c,
                    lapack_int* ldc, lapack_complex_float* work,
                    lapack_int *info );
void LAPACK_zupmtr( char* side, char* uplo, char* trans, lapack_int* m,
                    lapack_int* n, const lapack_complex_double* ap,
                    const lapack_complex_double* tau, lapack_complex_double* c,
                    lapack_int* ldc, lapack_complex_double* work,
                    lapack_int *info );
void LAPACK_ssbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,
                    float* ab, lapack_int* ldab, float* d, float* e, float* q,
                    lapack_int* ldq, float* work, lapack_int *info );
void LAPACK_dsbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,
                    double* ab, lapack_int* ldab, double* d, double* e,
                    double* q, lapack_int* ldq, double* work,
                    lapack_int *info );
void LAPACK_chbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,
                    lapack_complex_float* ab, lapack_int* ldab, float* d,
                    float* e, lapack_complex_float* q, lapack_int* ldq,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zhbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,
                    lapack_complex_double* ab, lapack_int* ldab, double* d,
                    double* e, lapack_complex_double* q, lapack_int* ldq,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_ssterf( lapack_int* n, float* d, float* e, lapack_int *info );
void LAPACK_dsterf( lapack_int* n, double* d, double* e, lapack_int *info );
void LAPACK_ssteqr( char* compz, lapack_int* n, float* d, float* e, float* z,
                    lapack_int* ldz, float* work, lapack_int *info );
void LAPACK_dsteqr( char* compz, lapack_int* n, double* d, double* e, double* z,
                    lapack_int* ldz, double* work, lapack_int *info );
void LAPACK_csteqr( char* compz, lapack_int* n, float* d, float* e,
                    lapack_complex_float* z, lapack_int* ldz, float* work,
                    lapack_int *info );
void LAPACK_zsteqr( char* compz, lapack_int* n, double* d, double* e,
                    lapack_complex_double* z, lapack_int* ldz, double* work,
                    lapack_int *info );
void LAPACK_sstemr( char* jobz, char* range, lapack_int* n, float* d, float* e,
                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
                    lapack_int* m, float* w, float* z, lapack_int* ldz,
                    lapack_int* nzc, lapack_int* isuppz, lapack_logical* tryrac,
                    float* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_dstemr( char* jobz, char* range, lapack_int* n, double* d,
                    double* e, double* vl, double* vu, lapack_int* il,
                    lapack_int* iu, lapack_int* m, double* w, double* z,
                    lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz,
                    lapack_logical* tryrac, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_cstemr( char* jobz, char* range, lapack_int* n, float* d, float* e,
                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
                    lapack_int* m, float* w, lapack_complex_float* z,
                    lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz,
                    lapack_logical* tryrac, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_zstemr( char* jobz, char* range, lapack_int* n, double* d,
                    double* e, double* vl, double* vu, lapack_int* il,
                    lapack_int* iu, lapack_int* m, double* w,
                    lapack_complex_double* z, lapack_int* ldz, lapack_int* nzc,
                    lapack_int* isuppz, lapack_logical* tryrac, double* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_sstedc( char* compz, lapack_int* n, float* d, float* e, float* z,
                    lapack_int* ldz, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_dstedc( char* compz, lapack_int* n, double* d, double* e, double* z,
                    lapack_int* ldz, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_cstedc( char* compz, lapack_int* n, float* d, float* e,
                    lapack_complex_float* z, lapack_int* ldz,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_zstedc( char* compz, lapack_int* n, double* d, double* e,
                    lapack_complex_double* z, lapack_int* ldz,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_sstegr( char* jobz, char* range, lapack_int* n, float* d, float* e,
                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
                    float* abstol, lapack_int* m, float* w, float* z,
                    lapack_int* ldz, lapack_int* isuppz, float* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_dstegr( char* jobz, char* range, lapack_int* n, double* d,
                    double* e, double* vl, double* vu, lapack_int* il,
                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
                    double* z, lapack_int* ldz, lapack_int* isuppz,
                    double* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_cstegr( char* jobz, char* range, lapack_int* n, float* d, float* e,
                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
                    float* abstol, lapack_int* m, float* w,
                    lapack_complex_float* z, lapack_int* ldz,
                    lapack_int* isuppz, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_zstegr( char* jobz, char* range, lapack_int* n, double* d,
                    double* e, double* vl, double* vu, lapack_int* il,
                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
                    lapack_complex_double* z, lapack_int* ldz,
                    lapack_int* isuppz, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_spteqr( char* compz, lapack_int* n, float* d, float* e, float* z,
                    lapack_int* ldz, float* work, lapack_int *info );
void LAPACK_dpteqr( char* compz, lapack_int* n, double* d, double* e, double* z,
                    lapack_int* ldz, double* work, lapack_int *info );
void LAPACK_cpteqr( char* compz, lapack_int* n, float* d, float* e,
                    lapack_complex_float* z, lapack_int* ldz, float* work,
                    lapack_int *info );
void LAPACK_zpteqr( char* compz, lapack_int* n, double* d, double* e,
                    lapack_complex_double* z, lapack_int* ldz, double* work,
                    lapack_int *info );
void LAPACK_sstebz( char* range, char* order, lapack_int* n, float* vl,
                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,
                    const float* d, const float* e, lapack_int* m,
                    lapack_int* nsplit, float* w, lapack_int* iblock,
                    lapack_int* isplit, float* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dstebz( char* range, char* order, lapack_int* n, double* vl,
                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
                    const double* d, const double* e, lapack_int* m,
                    lapack_int* nsplit, double* w, lapack_int* iblock,
                    lapack_int* isplit, double* work, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_sstein( lapack_int* n, const float* d, const float* e,
                    lapack_int* m, const float* w, const lapack_int* iblock,
                    const lapack_int* isplit, float* z, lapack_int* ldz,
                    float* work, lapack_int* iwork, lapack_int* ifailv,
                    lapack_int *info );
void LAPACK_dstein( lapack_int* n, const double* d, const double* e,
                    lapack_int* m, const double* w, const lapack_int* iblock,
                    const lapack_int* isplit, double* z, lapack_int* ldz,
                    double* work, lapack_int* iwork, lapack_int* ifailv,
                    lapack_int *info );
void LAPACK_cstein( lapack_int* n, const float* d, const float* e,
                    lapack_int* m, const float* w, const lapack_int* iblock,
                    const lapack_int* isplit, lapack_complex_float* z,
                    lapack_int* ldz, float* work, lapack_int* iwork,
                    lapack_int* ifailv, lapack_int *info );
void LAPACK_zstein( lapack_int* n, const double* d, const double* e,
                    lapack_int* m, const double* w, const lapack_int* iblock,
                    const lapack_int* isplit, lapack_complex_double* z,
                    lapack_int* ldz, double* work, lapack_int* iwork,
                    lapack_int* ifailv, lapack_int *info );
void LAPACK_sdisna( char* job, lapack_int* m, lapack_int* n, const float* d,
                    float* sep, lapack_int *info );
void LAPACK_ddisna( char* job, lapack_int* m, lapack_int* n, const double* d,
                    double* sep, lapack_int *info );
void LAPACK_ssygst( lapack_int* itype, char* uplo, lapack_int* n, float* a,
                    lapack_int* lda, const float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_dsygst( lapack_int* itype, char* uplo, lapack_int* n, double* a,
                    lapack_int* lda, const double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_chegst( lapack_int* itype, char* uplo, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_zhegst( lapack_int* itype, char* uplo, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_int *info );
void LAPACK_sspgst( lapack_int* itype, char* uplo, lapack_int* n, float* ap,
                    const float* bp, lapack_int *info );
void LAPACK_dspgst( lapack_int* itype, char* uplo, lapack_int* n, double* ap,
                    const double* bp, lapack_int *info );
void LAPACK_chpgst( lapack_int* itype, char* uplo, lapack_int* n,
                    lapack_complex_float* ap, const lapack_complex_float* bp,
                    lapack_int *info );
void LAPACK_zhpgst( lapack_int* itype, char* uplo, lapack_int* n,
                    lapack_complex_double* ap, const lapack_complex_double* bp,
                    lapack_int *info );
void LAPACK_ssbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,
                    lapack_int* kb, float* ab, lapack_int* ldab,
                    const float* bb, lapack_int* ldbb, float* x,
                    lapack_int* ldx, float* work, lapack_int *info );
void LAPACK_dsbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,
                    lapack_int* kb, double* ab, lapack_int* ldab,
                    const double* bb, lapack_int* ldbb, double* x,
                    lapack_int* ldx, double* work, lapack_int *info );
void LAPACK_chbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,
                    lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab,
                    const lapack_complex_float* bb, lapack_int* ldbb,
                    lapack_complex_float* x, lapack_int* ldx,
                    lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_zhbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,
                    lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab,
                    const lapack_complex_double* bb, lapack_int* ldbb,
                    lapack_complex_double* x, lapack_int* ldx,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_spbstf( char* uplo, lapack_int* n, lapack_int* kb, float* bb,
                    lapack_int* ldbb, lapack_int *info );
void LAPACK_dpbstf( char* uplo, lapack_int* n, lapack_int* kb, double* bb,
                    lapack_int* ldbb, lapack_int *info );
void LAPACK_cpbstf( char* uplo, lapack_int* n, lapack_int* kb,
                    lapack_complex_float* bb, lapack_int* ldbb,
                    lapack_int *info );
void LAPACK_zpbstf( char* uplo, lapack_int* n, lapack_int* kb,
                    lapack_complex_double* bb, lapack_int* ldbb,
                    lapack_int *info );
void LAPACK_sgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a,
                    lapack_int* lda, float* tau, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a,
                    lapack_int* lda, double* tau, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* tau, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* tau, lapack_complex_double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sorghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a,
                    lapack_int* lda, const float* tau, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dorghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a,
                    lapack_int* lda, const double* tau, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sormhr( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* ilo, lapack_int* ihi, const float* a,
                    lapack_int* lda, const float* tau, float* c,
                    lapack_int* ldc, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dormhr( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* ilo, lapack_int* ihi, const double* a,
                    lapack_int* lda, const double* tau, double* c,
                    lapack_int* ldc, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cunghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi,
                    lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* tau, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zunghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi,
                    lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cunmhr( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* ilo, lapack_int* ihi,
                    const lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* tau, lapack_complex_float* c,
                    lapack_int* ldc, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zunmhr( char* side, char* trans, lapack_int* m, lapack_int* n,
                    lapack_int* ilo, lapack_int* ihi,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* tau, lapack_complex_double* c,
                    lapack_int* ldc, lapack_complex_double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sgebal( char* job, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int* ilo, lapack_int* ihi, float* scale,
                    lapack_int *info );
void LAPACK_dgebal( char* job, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int* ilo, lapack_int* ihi, double* scale,
                    lapack_int *info );
void LAPACK_cgebal( char* job, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int* ilo, lapack_int* ihi,
                    float* scale, lapack_int *info );
void LAPACK_zgebal( char* job, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* ilo, lapack_int* ihi,
                    double* scale, lapack_int *info );
void LAPACK_sgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, const float* scale, lapack_int* m,
                    float* v, lapack_int* ldv, lapack_int *info );
void LAPACK_dgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, const double* scale, lapack_int* m,
                    double* v, lapack_int* ldv, lapack_int *info );
void LAPACK_cgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, const float* scale, lapack_int* m,
                    lapack_complex_float* v, lapack_int* ldv,
                    lapack_int *info );
void LAPACK_zgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, const double* scale, lapack_int* m,
                    lapack_complex_double* v, lapack_int* ldv,
                    lapack_int *info );
void LAPACK_shseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, float* h, lapack_int* ldh, float* wr,
                    float* wi, float* z, lapack_int* ldz, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dhseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, double* h, lapack_int* ldh, double* wr,
                    double* wi, double* z, lapack_int* ldz, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_chseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, lapack_complex_float* h, lapack_int* ldh,
                    lapack_complex_float* w, lapack_complex_float* z,
                    lapack_int* ldz, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zhseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, lapack_complex_double* h, lapack_int* ldh,
                    lapack_complex_double* w, lapack_complex_double* z,
                    lapack_int* ldz, lapack_complex_double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_shsein( char* job, char* eigsrc, char* initv,
                    lapack_logical* select, lapack_int* n, const float* h,
                    lapack_int* ldh, float* wr, const float* wi, float* vl,
                    lapack_int* ldvl, float* vr, lapack_int* ldvr,
                    lapack_int* mm, lapack_int* m, float* work,
                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );
void LAPACK_dhsein( char* job, char* eigsrc, char* initv,
                    lapack_logical* select, lapack_int* n, const double* h,
                    lapack_int* ldh, double* wr, const double* wi, double* vl,
                    lapack_int* ldvl, double* vr, lapack_int* ldvr,
                    lapack_int* mm, lapack_int* m, double* work,
                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );
void LAPACK_chsein( char* job, char* eigsrc, char* initv,
                    const lapack_logical* select, lapack_int* n,
                    const lapack_complex_float* h, lapack_int* ldh,
                    lapack_complex_float* w, lapack_complex_float* vl,
                    lapack_int* ldvl, lapack_complex_float* vr,
                    lapack_int* ldvr, lapack_int* mm, lapack_int* m,
                    lapack_complex_float* work, float* rwork,
                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );
void LAPACK_zhsein( char* job, char* eigsrc, char* initv,
                    const lapack_logical* select, lapack_int* n,
                    const lapack_complex_double* h, lapack_int* ldh,
                    lapack_complex_double* w, lapack_complex_double* vl,
                    lapack_int* ldvl, lapack_complex_double* vr,
                    lapack_int* ldvr, lapack_int* mm, lapack_int* m,
                    lapack_complex_double* work, double* rwork,
                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );
void LAPACK_strevc( char* side, char* howmny, lapack_logical* select,
                    lapack_int* n, const float* t, lapack_int* ldt, float* vl,
                    lapack_int* ldvl, float* vr, lapack_int* ldvr,
                    lapack_int* mm, lapack_int* m, float* work,
                    lapack_int *info );
void LAPACK_dtrevc( char* side, char* howmny, lapack_logical* select,
                    lapack_int* n, const double* t, lapack_int* ldt, double* vl,
                    lapack_int* ldvl, double* vr, lapack_int* ldvr,
                    lapack_int* mm, lapack_int* m, double* work,
                    lapack_int *info );
void LAPACK_ctrevc( char* side, char* howmny, const lapack_logical* select,
                    lapack_int* n, lapack_complex_float* t, lapack_int* ldt,
                    lapack_complex_float* vl, lapack_int* ldvl,
                    lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm,
                    lapack_int* m, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_ztrevc( char* side, char* howmny, const lapack_logical* select,
                    lapack_int* n, lapack_complex_double* t, lapack_int* ldt,
                    lapack_complex_double* vl, lapack_int* ldvl,
                    lapack_complex_double* vr, lapack_int* ldvr, lapack_int* mm,
                    lapack_int* m, lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_strsna( char* job, char* howmny, const lapack_logical* select,
                    lapack_int* n, const float* t, lapack_int* ldt,
                    const float* vl, lapack_int* ldvl, const float* vr,
                    lapack_int* ldvr, float* s, float* sep, lapack_int* mm,
                    lapack_int* m, float* work, lapack_int* ldwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_dtrsna( char* job, char* howmny, const lapack_logical* select,
                    lapack_int* n, const double* t, lapack_int* ldt,
                    const double* vl, lapack_int* ldvl, const double* vr,
                    lapack_int* ldvr, double* s, double* sep, lapack_int* mm,
                    lapack_int* m, double* work, lapack_int* ldwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_ctrsna( char* job, char* howmny, const lapack_logical* select,
                    lapack_int* n, const lapack_complex_float* t,
                    lapack_int* ldt, const lapack_complex_float* vl,
                    lapack_int* ldvl, const lapack_complex_float* vr,
                    lapack_int* ldvr, float* s, float* sep, lapack_int* mm,
                    lapack_int* m, lapack_complex_float* work,
                    lapack_int* ldwork, float* rwork, lapack_int *info );
void LAPACK_ztrsna( char* job, char* howmny, const lapack_logical* select,
                    lapack_int* n, const lapack_complex_double* t,
                    lapack_int* ldt, const lapack_complex_double* vl,
                    lapack_int* ldvl, const lapack_complex_double* vr,
                    lapack_int* ldvr, double* s, double* sep, lapack_int* mm,
                    lapack_int* m, lapack_complex_double* work,
                    lapack_int* ldwork, double* rwork, lapack_int *info );
void LAPACK_strexc( char* compq, lapack_int* n, float* t, lapack_int* ldt,
                    float* q, lapack_int* ldq, lapack_int* ifst,
                    lapack_int* ilst, float* work, lapack_int *info );
void LAPACK_dtrexc( char* compq, lapack_int* n, double* t, lapack_int* ldt,
                    double* q, lapack_int* ldq, lapack_int* ifst,
                    lapack_int* ilst, double* work, lapack_int *info );
void LAPACK_ctrexc( char* compq, lapack_int* n, lapack_complex_float* t,
                    lapack_int* ldt, lapack_complex_float* q, lapack_int* ldq,
                    lapack_int* ifst, lapack_int* ilst, lapack_int *info );
void LAPACK_ztrexc( char* compq, lapack_int* n, lapack_complex_double* t,
                    lapack_int* ldt, lapack_complex_double* q, lapack_int* ldq,
                    lapack_int* ifst, lapack_int* ilst, lapack_int *info );
void LAPACK_strsen( char* job, char* compq, const lapack_logical* select,
                    lapack_int* n, float* t, lapack_int* ldt, float* q,
                    lapack_int* ldq, float* wr, float* wi, lapack_int* m,
                    float* s, float* sep, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_dtrsen( char* job, char* compq, const lapack_logical* select,
                    lapack_int* n, double* t, lapack_int* ldt, double* q,
                    lapack_int* ldq, double* wr, double* wi, lapack_int* m,
                    double* s, double* sep, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_ctrsen( char* job, char* compq, const lapack_logical* select,
                    lapack_int* n, lapack_complex_float* t, lapack_int* ldt,
                    lapack_complex_float* q, lapack_int* ldq,
                    lapack_complex_float* w, lapack_int* m, float* s,
                    float* sep, lapack_complex_float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_ztrsen( char* job, char* compq, const lapack_logical* select,
                    lapack_int* n, lapack_complex_double* t, lapack_int* ldt,
                    lapack_complex_double* q, lapack_int* ldq,
                    lapack_complex_double* w, lapack_int* m, double* s,
                    double* sep, lapack_complex_double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_strsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,
                    lapack_int* n, const float* a, lapack_int* lda,
                    const float* b, lapack_int* ldb, float* c, lapack_int* ldc,
                    float* scale, lapack_int *info );
void LAPACK_dtrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,
                    lapack_int* n, const double* a, lapack_int* lda,
                    const double* b, lapack_int* ldb, double* c,
                    lapack_int* ldc, double* scale, lapack_int *info );
void LAPACK_ctrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,
                    lapack_int* n, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* b,
                    lapack_int* ldb, lapack_complex_float* c, lapack_int* ldc,
                    float* scale, lapack_int *info );
void LAPACK_ztrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,
                    lapack_int* n, const lapack_complex_double* a,
                    lapack_int* lda, const lapack_complex_double* b,
                    lapack_int* ldb, lapack_complex_double* c, lapack_int* ldc,
                    double* scale, lapack_int *info );
void LAPACK_sgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, float* a, lapack_int* lda, float* b,
                    lapack_int* ldb, float* q, lapack_int* ldq, float* z,
                    lapack_int* ldz, lapack_int *info );
void LAPACK_dgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, double* a, lapack_int* lda, double* b,
                    lapack_int* ldb, double* q, lapack_int* ldq, double* z,
                    lapack_int* ldz, lapack_int *info );
void LAPACK_cgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* q, lapack_int* ldq,
                    lapack_complex_float* z, lapack_int* ldz,
                    lapack_int *info );
void LAPACK_zgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* q, lapack_int* ldq,
                    lapack_complex_double* z, lapack_int* ldz,
                    lapack_int *info );
void LAPACK_sggbal( char* job, lapack_int* n, float* a, lapack_int* lda,
                    float* b, lapack_int* ldb, lapack_int* ilo, lapack_int* ihi,
                    float* lscale, float* rscale, float* work,
                    lapack_int *info );
void LAPACK_dggbal( char* job, lapack_int* n, double* a, lapack_int* lda,
                    double* b, lapack_int* ldb, lapack_int* ilo,
                    lapack_int* ihi, double* lscale, double* rscale,
                    double* work, lapack_int *info );
void LAPACK_cggbal( char* job, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,
                    lapack_int* ilo, lapack_int* ihi, float* lscale,
                    float* rscale, float* work, lapack_int *info );
void LAPACK_zggbal( char* job, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,
                    lapack_int* ilo, lapack_int* ihi, double* lscale,
                    double* rscale, double* work, lapack_int *info );
void LAPACK_sggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, const float* lscale, const float* rscale,
                    lapack_int* m, float* v, lapack_int* ldv,
                    lapack_int *info );
void LAPACK_dggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, const double* lscale, const double* rscale,
                    lapack_int* m, double* v, lapack_int* ldv,
                    lapack_int *info );
void LAPACK_cggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, const float* lscale, const float* rscale,
                    lapack_int* m, lapack_complex_float* v, lapack_int* ldv,
                    lapack_int *info );
void LAPACK_zggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,
                    lapack_int* ihi, const double* lscale, const double* rscale,
                    lapack_int* m, lapack_complex_double* v, lapack_int* ldv,
                    lapack_int *info );
void LAPACK_shgeqz( char* job, char* compq, char* compz, lapack_int* n,
                    lapack_int* ilo, lapack_int* ihi, float* h, lapack_int* ldh,
                    float* t, lapack_int* ldt, float* alphar, float* alphai,
                    float* beta, float* q, lapack_int* ldq, float* z,
                    lapack_int* ldz, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dhgeqz( char* job, char* compq, char* compz, lapack_int* n,
                    lapack_int* ilo, lapack_int* ihi, double* h,
                    lapack_int* ldh, double* t, lapack_int* ldt, double* alphar,
                    double* alphai, double* beta, double* q, lapack_int* ldq,
                    double* z, lapack_int* ldz, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_chgeqz( char* job, char* compq, char* compz, lapack_int* n,
                    lapack_int* ilo, lapack_int* ihi, lapack_complex_float* h,
                    lapack_int* ldh, lapack_complex_float* t, lapack_int* ldt,
                    lapack_complex_float* alpha, lapack_complex_float* beta,
                    lapack_complex_float* q, lapack_int* ldq,
                    lapack_complex_float* z, lapack_int* ldz,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int *info );
void LAPACK_zhgeqz( char* job, char* compq, char* compz, lapack_int* n,
                    lapack_int* ilo, lapack_int* ihi, lapack_complex_double* h,
                    lapack_int* ldh, lapack_complex_double* t, lapack_int* ldt,
                    lapack_complex_double* alpha, lapack_complex_double* beta,
                    lapack_complex_double* q, lapack_int* ldq,
                    lapack_complex_double* z, lapack_int* ldz,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int *info );
void LAPACK_stgevc( char* side, char* howmny, const lapack_logical* select,
                    lapack_int* n, const float* s, lapack_int* lds,
                    const float* p, lapack_int* ldp, float* vl,
                    lapack_int* ldvl, float* vr, lapack_int* ldvr,
                    lapack_int* mm, lapack_int* m, float* work,
                    lapack_int *info );
void LAPACK_dtgevc( char* side, char* howmny, const lapack_logical* select,
                    lapack_int* n, const double* s, lapack_int* lds,
                    const double* p, lapack_int* ldp, double* vl,
                    lapack_int* ldvl, double* vr, lapack_int* ldvr,
                    lapack_int* mm, lapack_int* m, double* work,
                    lapack_int *info );
void LAPACK_ctgevc( char* side, char* howmny, const lapack_logical* select,
                    lapack_int* n, const lapack_complex_float* s,
                    lapack_int* lds, const lapack_complex_float* p,
                    lapack_int* ldp, lapack_complex_float* vl, lapack_int* ldvl,
                    lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm,
                    lapack_int* m, lapack_complex_float* work, float* rwork,
                    lapack_int *info );
void LAPACK_ztgevc( char* side, char* howmny, const lapack_logical* select,
                    lapack_int* n, const lapack_complex_double* s,
                    lapack_int* lds, const lapack_complex_double* p,
                    lapack_int* ldp, lapack_complex_double* vl,
                    lapack_int* ldvl, lapack_complex_double* vr,
                    lapack_int* ldvr, lapack_int* mm, lapack_int* m,
                    lapack_complex_double* work, double* rwork,
                    lapack_int *info );
void LAPACK_stgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,
                    float* a, lapack_int* lda, float* b, lapack_int* ldb,
                    float* q, lapack_int* ldq, float* z, lapack_int* ldz,
                    lapack_int* ifst, lapack_int* ilst, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dtgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,
                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
                    double* q, lapack_int* ldq, double* z, lapack_int* ldz,
                    lapack_int* ifst, lapack_int* ilst, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_ctgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* q, lapack_int* ldq,
                    lapack_complex_float* z, lapack_int* ldz, lapack_int* ifst,
                    lapack_int* ilst, lapack_int *info );
void LAPACK_ztgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* q, lapack_int* ldq,
                    lapack_complex_double* z, lapack_int* ldz, lapack_int* ifst,
                    lapack_int* ilst, lapack_int *info );
void LAPACK_stgsen( lapack_int* ijob, lapack_logical* wantq,
                    lapack_logical* wantz, const lapack_logical* select,
                    lapack_int* n, float* a, lapack_int* lda, float* b,
                    lapack_int* ldb, float* alphar, float* alphai, float* beta,
                    float* q, lapack_int* ldq, float* z, lapack_int* ldz,
                    lapack_int* m, float* pl, float* pr, float* dif,
                    float* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_dtgsen( lapack_int* ijob, lapack_logical* wantq,
                    lapack_logical* wantz, const lapack_logical* select,
                    lapack_int* n, double* a, lapack_int* lda, double* b,
                    lapack_int* ldb, double* alphar, double* alphai,
                    double* beta, double* q, lapack_int* ldq, double* z,
                    lapack_int* ldz, lapack_int* m, double* pl, double* pr,
                    double* dif, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_ctgsen( lapack_int* ijob, lapack_logical* wantq,
                    lapack_logical* wantz, const lapack_logical* select,
                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* alpha, lapack_complex_float* beta,
                    lapack_complex_float* q, lapack_int* ldq,
                    lapack_complex_float* z, lapack_int* ldz, lapack_int* m,
                    float* pl, float* pr, float* dif,
                    lapack_complex_float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_ztgsen( lapack_int* ijob, lapack_logical* wantq,
                    lapack_logical* wantz, const lapack_logical* select,
                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* alpha, lapack_complex_double* beta,
                    lapack_complex_double* q, lapack_int* ldq,
                    lapack_complex_double* z, lapack_int* ldz, lapack_int* m,
                    double* pl, double* pr, double* dif,
                    lapack_complex_double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_stgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,
                    const float* a, lapack_int* lda, const float* b,
                    lapack_int* ldb, float* c, lapack_int* ldc, const float* d,
                    lapack_int* ldd, const float* e, lapack_int* lde, float* f,
                    lapack_int* ldf, float* scale, float* dif, float* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
void LAPACK_dtgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,
                    const double* a, lapack_int* lda, const double* b,
                    lapack_int* ldb, double* c, lapack_int* ldc,
                    const double* d, lapack_int* ldd, const double* e,
                    lapack_int* lde, double* f, lapack_int* ldf, double* scale,
                    double* dif, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_ctgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,
                    const lapack_complex_float* a, lapack_int* lda,
                    const lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* c, lapack_int* ldc,
                    const lapack_complex_float* d, lapack_int* ldd,
                    const lapack_complex_float* e, lapack_int* lde,
                    lapack_complex_float* f, lapack_int* ldf, float* scale,
                    float* dif, lapack_complex_float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_ztgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,
                    const lapack_complex_double* a, lapack_int* lda,
                    const lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* c, lapack_int* ldc,
                    const lapack_complex_double* d, lapack_int* ldd,
                    const lapack_complex_double* e, lapack_int* lde,
                    lapack_complex_double* f, lapack_int* ldf, double* scale,
                    double* dif, lapack_complex_double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_stgsna( char* job, char* howmny, const lapack_logical* select,
                    lapack_int* n, const float* a, lapack_int* lda,
                    const float* b, lapack_int* ldb, const float* vl,
                    lapack_int* ldvl, const float* vr, lapack_int* ldvr,
                    float* s, float* dif, lapack_int* mm, lapack_int* m,
                    float* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dtgsna( char* job, char* howmny, const lapack_logical* select,
                    lapack_int* n, const double* a, lapack_int* lda,
                    const double* b, lapack_int* ldb, const double* vl,
                    lapack_int* ldvl, const double* vr, lapack_int* ldvr,
                    double* s, double* dif, lapack_int* mm, lapack_int* m,
                    double* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_ctgsna( char* job, char* howmny, const lapack_logical* select,
                    lapack_int* n, const lapack_complex_float* a,
                    lapack_int* lda, const lapack_complex_float* b,
                    lapack_int* ldb, const lapack_complex_float* vl,
                    lapack_int* ldvl, const lapack_complex_float* vr,
                    lapack_int* ldvr, float* s, float* dif, lapack_int* mm,
                    lapack_int* m, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
void LAPACK_ztgsna( char* job, char* howmny, const lapack_logical* select,
                    lapack_int* n, const lapack_complex_double* a,
                    lapack_int* lda, const lapack_complex_double* b,
                    lapack_int* ldb, const lapack_complex_double* vl,
                    lapack_int* ldvl, const lapack_complex_double* vr,
                    lapack_int* ldvr, double* s, double* dif, lapack_int* mm,
                    lapack_int* m, lapack_complex_double* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
void LAPACK_sggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* p, lapack_int* n, float* a, lapack_int* lda,
                    float* b, lapack_int* ldb, float* tola, float* tolb,
                    lapack_int* k, lapack_int* l, float* u, lapack_int* ldu,
                    float* v, lapack_int* ldv, float* q, lapack_int* ldq,
                    lapack_int* iwork, float* tau, float* work,
                    lapack_int *info );
void LAPACK_dggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* p, lapack_int* n, double* a, lapack_int* lda,
                    double* b, lapack_int* ldb, double* tola, double* tolb,
                    lapack_int* k, lapack_int* l, double* u, lapack_int* ldu,
                    double* v, lapack_int* ldv, double* q, lapack_int* ldq,
                    lapack_int* iwork, double* tau, double* work,
                    lapack_int *info );
void LAPACK_cggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* p, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,
                    float* tola, float* tolb, lapack_int* k, lapack_int* l,
                    lapack_complex_float* u, lapack_int* ldu,
                    lapack_complex_float* v, lapack_int* ldv,
                    lapack_complex_float* q, lapack_int* ldq, lapack_int* iwork,
                    float* rwork, lapack_complex_float* tau,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* p, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,
                    double* tola, double* tolb, lapack_int* k, lapack_int* l,
                    lapack_complex_double* u, lapack_int* ldu,
                    lapack_complex_double* v, lapack_int* ldv,
                    lapack_complex_double* q, lapack_int* ldq,
                    lapack_int* iwork, double* rwork,
                    lapack_complex_double* tau, lapack_complex_double* work,
                    lapack_int *info );
void LAPACK_stgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,
                    float* a, lapack_int* lda, float* b, lapack_int* ldb,
                    float* tola, float* tolb, float* alpha, float* beta,
                    float* u, lapack_int* ldu, float* v, lapack_int* ldv,
                    float* q, lapack_int* ldq, float* work, lapack_int* ncycle,
                    lapack_int *info );
void LAPACK_dtgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,
                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
                    double* tola, double* tolb, double* alpha, double* beta,
                    double* u, lapack_int* ldu, double* v, lapack_int* ldv,
                    double* q, lapack_int* ldq, double* work,
                    lapack_int* ncycle, lapack_int *info );
void LAPACK_ctgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb, float* tola,
                    float* tolb, float* alpha, float* beta,
                    lapack_complex_float* u, lapack_int* ldu,
                    lapack_complex_float* v, lapack_int* ldv,
                    lapack_complex_float* q, lapack_int* ldq,
                    lapack_complex_float* work, lapack_int* ncycle,
                    lapack_int *info );
void LAPACK_ztgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb, double* tola,
                    double* tolb, double* alpha, double* beta,
                    lapack_complex_double* u, lapack_int* ldu,
                    lapack_complex_double* v, lapack_int* ldv,
                    lapack_complex_double* q, lapack_int* ldq,
                    lapack_complex_double* work, lapack_int* ncycle,
                    lapack_int *info );
void LAPACK_sgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,
                   float* a, lapack_int* lda, float* b, lapack_int* ldb,
                   float* work, lapack_int* lwork, lapack_int *info );
void LAPACK_dgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,
                   double* a, lapack_int* lda, double* b, lapack_int* ldb,
                   double* work, lapack_int* lwork, lapack_int *info );
void LAPACK_cgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_float* a, lapack_int* lda,
                   lapack_complex_float* b, lapack_int* ldb,
                   lapack_complex_float* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_zgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,
                   lapack_complex_double* a, lapack_int* lda,
                   lapack_complex_double* b, lapack_int* ldb,
                   lapack_complex_double* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_sgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a,
                    lapack_int* lda, float* b, lapack_int* ldb,
                    lapack_int* jpvt, float* rcond, lapack_int* rank,
                    float* work, lapack_int* lwork, lapack_int *info );
void LAPACK_dgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a,
                    lapack_int* lda, double* b, lapack_int* ldb,
                    lapack_int* jpvt, double* rcond, lapack_int* rank,
                    double* work, lapack_int* lwork, lapack_int *info );
void LAPACK_cgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb, lapack_int* jpvt,
                    float* rcond, lapack_int* rank, lapack_complex_float* work,
                    lapack_int* lwork, float* rwork, lapack_int *info );
void LAPACK_zgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb, lapack_int* jpvt,
                    double* rcond, lapack_int* rank,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int *info );
void LAPACK_sgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a,
                    lapack_int* lda, float* b, lapack_int* ldb, float* s,
                    float* rcond, lapack_int* rank, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a,
                    lapack_int* lda, double* b, lapack_int* ldb, double* s,
                    double* rcond, lapack_int* rank, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb, float* s,
                    float* rcond, lapack_int* rank, lapack_complex_float* work,
                    lapack_int* lwork, float* rwork, lapack_int *info );
void LAPACK_zgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb, double* s,
                    double* rcond, lapack_int* rank,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int *info );
void LAPACK_sgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a,
                    lapack_int* lda, float* b, lapack_int* ldb, float* s,
                    float* rcond, lapack_int* rank, float* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
void LAPACK_dgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a,
                    lapack_int* lda, double* b, lapack_int* ldb, double* s,
                    double* rcond, lapack_int* rank, double* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
void LAPACK_cgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb, float* s,
                    float* rcond, lapack_int* rank, lapack_complex_float* work,
                    lapack_int* lwork, float* rwork, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_zgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb, double* s,
                    double* rcond, lapack_int* rank,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* iwork, lapack_int *info );
void LAPACK_sgglse( lapack_int* m, lapack_int* n, lapack_int* p, float* a,
                    lapack_int* lda, float* b, lapack_int* ldb, float* c,
                    float* d, float* x, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dgglse( lapack_int* m, lapack_int* n, lapack_int* p, double* a,
                    lapack_int* lda, double* b, lapack_int* ldb, double* c,
                    double* d, double* x, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cgglse( lapack_int* m, lapack_int* n, lapack_int* p,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* c, lapack_complex_float* d,
                    lapack_complex_float* x, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zgglse( lapack_int* m, lapack_int* n, lapack_int* p,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* c, lapack_complex_double* d,
                    lapack_complex_double* x, lapack_complex_double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sggglm( lapack_int* n, lapack_int* m, lapack_int* p, float* a,
                    lapack_int* lda, float* b, lapack_int* ldb, float* d,
                    float* x, float* y, float* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_dggglm( lapack_int* n, lapack_int* m, lapack_int* p, double* a,
                    lapack_int* lda, double* b, lapack_int* ldb, double* d,
                    double* x, double* y, double* work, lapack_int* lwork,
                    lapack_int *info );
void LAPACK_cggglm( lapack_int* n, lapack_int* m, lapack_int* p,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* d, lapack_complex_float* x,
                    lapack_complex_float* y, lapack_complex_float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_zggglm( lapack_int* n, lapack_int* m, lapack_int* p,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* d, lapack_complex_double* x,
                    lapack_complex_double* y, lapack_complex_double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_ssyev( char* jobz, char* uplo, lapack_int* n, float* a,
                   lapack_int* lda, float* w, float* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_dsyev( char* jobz, char* uplo, lapack_int* n, double* a,
                   lapack_int* lda, double* w, double* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_cheev( char* jobz, char* uplo, lapack_int* n,
                   lapack_complex_float* a, lapack_int* lda, float* w,
                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
                   lapack_int *info );
void LAPACK_zheev( char* jobz, char* uplo, lapack_int* n,
                   lapack_complex_double* a, lapack_int* lda, double* w,
                   lapack_complex_double* work, lapack_int* lwork,
                   double* rwork, lapack_int *info );
void LAPACK_ssyevd( char* jobz, char* uplo, lapack_int* n, float* a,
                    lapack_int* lda, float* w, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_dsyevd( char* jobz, char* uplo, lapack_int* n, double* a,
                    lapack_int* lda, double* w, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_cheevd( char* jobz, char* uplo, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda, float* w,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_zheevd( char* jobz, char* uplo, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda, double* w,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_ssyevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    float* a, lapack_int* lda, float* vl, float* vu,
                    lapack_int* il, lapack_int* iu, float* abstol,
                    lapack_int* m, float* w, float* z, lapack_int* ldz,
                    float* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_dsyevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    double* a, lapack_int* lda, double* vl, double* vu,
                    lapack_int* il, lapack_int* iu, double* abstol,
                    lapack_int* m, double* w, double* z, lapack_int* ldz,
                    double* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_cheevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda, float* vl,
                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,
                    lapack_int* m, float* w, lapack_complex_float* z,
                    lapack_int* ldz, lapack_complex_float* work,
                    lapack_int* lwork, float* rwork, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_zheevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda, double* vl,
                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
                    lapack_int* m, double* w, lapack_complex_double* z,
                    lapack_int* ldz, lapack_complex_double* work,
                    lapack_int* lwork, double* rwork, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_ssyevr( char* jobz, char* range, char* uplo, lapack_int* n,
                    float* a, lapack_int* lda, float* vl, float* vu,
                    lapack_int* il, lapack_int* iu, float* abstol,
                    lapack_int* m, float* w, float* z, lapack_int* ldz,
                    lapack_int* isuppz, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_dsyevr( char* jobz, char* range, char* uplo, lapack_int* n,
                    double* a, lapack_int* lda, double* vl, double* vu,
                    lapack_int* il, lapack_int* iu, double* abstol,
                    lapack_int* m, double* w, double* z, lapack_int* ldz,
                    lapack_int* isuppz, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_cheevr( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda, float* vl,
                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,
                    lapack_int* m, float* w, lapack_complex_float* z,
                    lapack_int* ldz, lapack_int* isuppz,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_zheevr( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda, double* vl,
                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
                    lapack_int* m, double* w, lapack_complex_double* z,
                    lapack_int* ldz, lapack_int* isuppz,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_sspev( char* jobz, char* uplo, lapack_int* n, float* ap, float* w,
                   float* z, lapack_int* ldz, float* work, lapack_int *info );
void LAPACK_dspev( char* jobz, char* uplo, lapack_int* n, double* ap, double* w,
                   double* z, lapack_int* ldz, double* work, lapack_int *info );
void LAPACK_chpev( char* jobz, char* uplo, lapack_int* n,
                   lapack_complex_float* ap, float* w, lapack_complex_float* z,
                   lapack_int* ldz, lapack_complex_float* work, float* rwork,
                   lapack_int *info );
void LAPACK_zhpev( char* jobz, char* uplo, lapack_int* n,
                   lapack_complex_double* ap, double* w,
                   lapack_complex_double* z, lapack_int* ldz,
                   lapack_complex_double* work, double* rwork,
                   lapack_int *info );
void LAPACK_sspevd( char* jobz, char* uplo, lapack_int* n, float* ap, float* w,
                    float* z, lapack_int* ldz, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_dspevd( char* jobz, char* uplo, lapack_int* n, double* ap,
                    double* w, double* z, lapack_int* ldz, double* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_chpevd( char* jobz, char* uplo, lapack_int* n,
                    lapack_complex_float* ap, float* w, lapack_complex_float* z,
                    lapack_int* ldz, lapack_complex_float* work,
                    lapack_int* lwork, float* rwork, lapack_int* lrwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_zhpevd( char* jobz, char* uplo, lapack_int* n,
                    lapack_complex_double* ap, double* w,
                    lapack_complex_double* z, lapack_int* ldz,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_sspevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    float* ap, float* vl, float* vu, lapack_int* il,
                    lapack_int* iu, float* abstol, lapack_int* m, float* w,
                    float* z, lapack_int* ldz, float* work, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_dspevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    double* ap, double* vl, double* vu, lapack_int* il,
                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_chpevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_complex_float* ap, float* vl, float* vu,
                    lapack_int* il, lapack_int* iu, float* abstol,
                    lapack_int* m, float* w, lapack_complex_float* z,
                    lapack_int* ldz, lapack_complex_float* work, float* rwork,
                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
void LAPACK_zhpevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_complex_double* ap, double* vl, double* vu,
                    lapack_int* il, lapack_int* iu, double* abstol,
                    lapack_int* m, double* w, lapack_complex_double* z,
                    lapack_int* ldz, lapack_complex_double* work, double* rwork,
                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
void LAPACK_ssbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
                   float* ab, lapack_int* ldab, float* w, float* z,
                   lapack_int* ldz, float* work, lapack_int *info );
void LAPACK_dsbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
                   double* ab, lapack_int* ldab, double* w, double* z,
                   lapack_int* ldz, double* work, lapack_int *info );
void LAPACK_chbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
                   lapack_complex_float* ab, lapack_int* ldab, float* w,
                   lapack_complex_float* z, lapack_int* ldz,
                   lapack_complex_float* work, float* rwork, lapack_int *info );
void LAPACK_zhbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
                   lapack_complex_double* ab, lapack_int* ldab, double* w,
                   lapack_complex_double* z, lapack_int* ldz,
                   lapack_complex_double* work, double* rwork,
                   lapack_int *info );
void LAPACK_ssbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
                    float* ab, lapack_int* ldab, float* w, float* z,
                    lapack_int* ldz, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_dsbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
                    double* ab, lapack_int* ldab, double* w, double* z,
                    lapack_int* ldz, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_chbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
                    lapack_complex_float* ab, lapack_int* ldab, float* w,
                    lapack_complex_float* z, lapack_int* ldz,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_zhbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
                    lapack_complex_double* ab, lapack_int* ldab, double* w,
                    lapack_complex_double* z, lapack_int* ldz,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_ssbevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_int* kd, float* ab, lapack_int* ldab, float* q,
                    lapack_int* ldq, float* vl, float* vu, lapack_int* il,
                    lapack_int* iu, float* abstol, lapack_int* m, float* w,
                    float* z, lapack_int* ldz, float* work, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_dsbevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_int* kd, double* ab, lapack_int* ldab, double* q,
                    lapack_int* ldq, double* vl, double* vu, lapack_int* il,
                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_chbevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_int* kd, lapack_complex_float* ab, lapack_int* ldab,
                    lapack_complex_float* q, lapack_int* ldq, float* vl,
                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,
                    lapack_int* m, float* w, lapack_complex_float* z,
                    lapack_int* ldz, lapack_complex_float* work, float* rwork,
                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
void LAPACK_zhbevx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_int* kd, lapack_complex_double* ab, lapack_int* ldab,
                    lapack_complex_double* q, lapack_int* ldq, double* vl,
                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
                    lapack_int* m, double* w, lapack_complex_double* z,
                    lapack_int* ldz, lapack_complex_double* work, double* rwork,
                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
void LAPACK_sstev( char* jobz, lapack_int* n, float* d, float* e, float* z,
                   lapack_int* ldz, float* work, lapack_int *info );
void LAPACK_dstev( char* jobz, lapack_int* n, double* d, double* e, double* z,
                   lapack_int* ldz, double* work, lapack_int *info );
void LAPACK_sstevd( char* jobz, lapack_int* n, float* d, float* e, float* z,
                    lapack_int* ldz, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_dstevd( char* jobz, lapack_int* n, double* d, double* e, double* z,
                    lapack_int* ldz, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_sstevx( char* jobz, char* range, lapack_int* n, float* d, float* e,
                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
                    float* abstol, lapack_int* m, float* w, float* z,
                    lapack_int* ldz, float* work, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_dstevx( char* jobz, char* range, lapack_int* n, double* d,
                    double* e, double* vl, double* vu, lapack_int* il,
                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_sstevr( char* jobz, char* range, lapack_int* n, float* d, float* e,
                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
                    float* abstol, lapack_int* m, float* w, float* z,
                    lapack_int* ldz, lapack_int* isuppz, float* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_dstevr( char* jobz, char* range, lapack_int* n, double* d,
                    double* e, double* vl, double* vu, lapack_int* il,
                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
                    double* z, lapack_int* ldz, lapack_int* isuppz,
                    double* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_sgees( char* jobvs, char* sort, LAPACK_S_SELECT2 select,
                   lapack_int* n, float* a, lapack_int* lda, lapack_int* sdim,
                   float* wr, float* wi, float* vs, lapack_int* ldvs,
                   float* work, lapack_int* lwork, lapack_logical* bwork,
                   lapack_int *info );
void LAPACK_dgees( char* jobvs, char* sort, LAPACK_D_SELECT2 select,
                   lapack_int* n, double* a, lapack_int* lda, lapack_int* sdim,
                   double* wr, double* wi, double* vs, lapack_int* ldvs,
                   double* work, lapack_int* lwork, lapack_logical* bwork,
                   lapack_int *info );
void LAPACK_cgees( char* jobvs, char* sort, LAPACK_C_SELECT1 select,
                   lapack_int* n, lapack_complex_float* a, lapack_int* lda,
                   lapack_int* sdim, lapack_complex_float* w,
                   lapack_complex_float* vs, lapack_int* ldvs,
                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
                   lapack_logical* bwork, lapack_int *info );
void LAPACK_zgees( char* jobvs, char* sort, LAPACK_Z_SELECT1 select,
                   lapack_int* n, lapack_complex_double* a, lapack_int* lda,
                   lapack_int* sdim, lapack_complex_double* w,
                   lapack_complex_double* vs, lapack_int* ldvs,
                   lapack_complex_double* work, lapack_int* lwork,
                   double* rwork, lapack_logical* bwork, lapack_int *info );
void LAPACK_sgeesx( char* jobvs, char* sort, LAPACK_S_SELECT2 select,
                    char* sense, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int* sdim, float* wr, float* wi, float* vs,
                    lapack_int* ldvs, float* rconde, float* rcondv, float* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_logical* bwork, lapack_int *info );
void LAPACK_dgeesx( char* jobvs, char* sort, LAPACK_D_SELECT2 select,
                    char* sense, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int* sdim, double* wr, double* wi, double* vs,
                    lapack_int* ldvs, double* rconde, double* rcondv,
                    double* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_logical* bwork,
                    lapack_int *info );
void LAPACK_cgeesx( char* jobvs, char* sort, LAPACK_C_SELECT1 select,
                    char* sense, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int* sdim, lapack_complex_float* w,
                    lapack_complex_float* vs, lapack_int* ldvs, float* rconde,
                    float* rcondv, lapack_complex_float* work,
                    lapack_int* lwork, float* rwork, lapack_logical* bwork,
                    lapack_int *info );
void LAPACK_zgeesx( char* jobvs, char* sort, LAPACK_Z_SELECT1 select,
                    char* sense, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* sdim, lapack_complex_double* w,
                    lapack_complex_double* vs, lapack_int* ldvs, double* rconde,
                    double* rcondv, lapack_complex_double* work,
                    lapack_int* lwork, double* rwork, lapack_logical* bwork,
                    lapack_int *info );
void LAPACK_sgeev( char* jobvl, char* jobvr, lapack_int* n, float* a,
                   lapack_int* lda, float* wr, float* wi, float* vl,
                   lapack_int* ldvl, float* vr, lapack_int* ldvr, float* work,
                   lapack_int* lwork, lapack_int *info );
void LAPACK_dgeev( char* jobvl, char* jobvr, lapack_int* n, double* a,
                   lapack_int* lda, double* wr, double* wi, double* vl,
                   lapack_int* ldvl, double* vr, lapack_int* ldvr, double* work,
                   lapack_int* lwork, lapack_int *info );
void LAPACK_cgeev( char* jobvl, char* jobvr, lapack_int* n,
                   lapack_complex_float* a, lapack_int* lda,
                   lapack_complex_float* w, lapack_complex_float* vl,
                   lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr,
                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
                   lapack_int *info );
void LAPACK_zgeev( char* jobvl, char* jobvr, lapack_int* n,
                   lapack_complex_double* a, lapack_int* lda,
                   lapack_complex_double* w, lapack_complex_double* vl,
                   lapack_int* ldvl, lapack_complex_double* vr,
                   lapack_int* ldvr, lapack_complex_double* work,
                   lapack_int* lwork, double* rwork, lapack_int *info );
void LAPACK_sgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,
                    lapack_int* n, float* a, lapack_int* lda, float* wr,
                    float* wi, float* vl, lapack_int* ldvl, float* vr,
                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,
                    float* scale, float* abnrm, float* rconde, float* rcondv,
                    float* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_dgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,
                    lapack_int* n, double* a, lapack_int* lda, double* wr,
                    double* wi, double* vl, lapack_int* ldvl, double* vr,
                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,
                    double* scale, double* abnrm, double* rconde,
                    double* rcondv, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_cgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,
                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* w, lapack_complex_float* vl,
                    lapack_int* ldvl, lapack_complex_float* vr,
                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,
                    float* scale, float* abnrm, float* rconde, float* rcondv,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int *info );
void LAPACK_zgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,
                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* w, lapack_complex_double* vl,
                    lapack_int* ldvl, lapack_complex_double* vr,
                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,
                    double* scale, double* abnrm, double* rconde,
                    double* rcondv, lapack_complex_double* work,
                    lapack_int* lwork, double* rwork, lapack_int *info );
void LAPACK_sgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,
                    float* a, lapack_int* lda, float* s, float* u,
                    lapack_int* ldu, float* vt, lapack_int* ldvt, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_dgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,
                    double* a, lapack_int* lda, double* s, double* u,
                    lapack_int* ldu, double* vt, lapack_int* ldvt, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_cgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda, float* s,
                    lapack_complex_float* u, lapack_int* ldu,
                    lapack_complex_float* vt, lapack_int* ldvt,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int *info );
void LAPACK_zgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda, double* s,
                    lapack_complex_double* u, lapack_int* ldu,
                    lapack_complex_double* vt, lapack_int* ldvt,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int *info );
void LAPACK_sgesdd( char* jobz, lapack_int* m, lapack_int* n, float* a,
                    lapack_int* lda, float* s, float* u, lapack_int* ldu,
                    float* vt, lapack_int* ldvt, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_dgesdd( char* jobz, lapack_int* m, lapack_int* n, double* a,
                    lapack_int* lda, double* s, double* u, lapack_int* ldu,
                    double* vt, lapack_int* ldvt, double* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
void LAPACK_cgesdd( char* jobz, lapack_int* m, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda, float* s,
                    lapack_complex_float* u, lapack_int* ldu,
                    lapack_complex_float* vt, lapack_int* ldvt,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_zgesdd( char* jobz, lapack_int* m, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda, double* s,
                    lapack_complex_double* u, lapack_int* ldu,
                    lapack_complex_double* vt, lapack_int* ldvt,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* iwork, lapack_int *info );
void LAPACK_dgejsv( char* joba, char* jobu, char* jobv, char* jobr, char* jobt,
                    char* jobp, lapack_int* m, lapack_int* n, double* a,
                    lapack_int* lda, double* sva, double* u, lapack_int* ldu,
                    double* v, lapack_int* ldv, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_sgejsv( char* joba, char* jobu, char* jobv, char* jobr, char* jobt,
                    char* jobp, lapack_int* m, lapack_int* n, float* a,
                    lapack_int* lda, float* sva, float* u, lapack_int* ldu,
                    float* v, lapack_int* ldv, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_dgesvj( char* joba, char* jobu, char* jobv, lapack_int* m,
                    lapack_int* n, double* a, lapack_int* lda, double* sva,
                    lapack_int* mv, double* v, lapack_int* ldv, double* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sgesvj( char* joba, char* jobu, char* jobv, lapack_int* m,
                    lapack_int* n, float* a, lapack_int* lda, float* sva,
                    lapack_int* mv, float* v, lapack_int* ldv, float* work,
                    lapack_int* lwork, lapack_int *info );
void LAPACK_sggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,
                    float* a, lapack_int* lda, float* b, lapack_int* ldb,
                    float* alpha, float* beta, float* u, lapack_int* ldu,
                    float* v, lapack_int* ldv, float* q, lapack_int* ldq,
                    float* work, lapack_int* iwork, lapack_int *info );
void LAPACK_dggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,
                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
                    double* alpha, double* beta, double* u, lapack_int* ldu,
                    double* v, lapack_int* ldv, double* q, lapack_int* ldq,
                    double* work, lapack_int* iwork, lapack_int *info );
void LAPACK_cggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb, float* alpha,
                    float* beta, lapack_complex_float* u, lapack_int* ldu,
                    lapack_complex_float* v, lapack_int* ldv,
                    lapack_complex_float* q, lapack_int* ldq,
                    lapack_complex_float* work, float* rwork, lapack_int* iwork,
                    lapack_int *info );
void LAPACK_zggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,
                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb, double* alpha,
                    double* beta, lapack_complex_double* u, lapack_int* ldu,
                    lapack_complex_double* v, lapack_int* ldv,
                    lapack_complex_double* q, lapack_int* ldq,
                    lapack_complex_double* work, double* rwork,
                    lapack_int* iwork, lapack_int *info );
void LAPACK_ssygv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                   float* a, lapack_int* lda, float* b, lapack_int* ldb,
                   float* w, float* work, lapack_int* lwork, lapack_int *info );
void LAPACK_dsygv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                   double* a, lapack_int* lda, double* b, lapack_int* ldb,
                   double* w, double* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_chegv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                   lapack_complex_float* a, lapack_int* lda,
                   lapack_complex_float* b, lapack_int* ldb, float* w,
                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
                   lapack_int *info );
void LAPACK_zhegv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                   lapack_complex_double* a, lapack_int* lda,
                   lapack_complex_double* b, lapack_int* ldb, double* w,
                   lapack_complex_double* work, lapack_int* lwork,
                   double* rwork, lapack_int *info );
void LAPACK_ssygvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                    float* a, lapack_int* lda, float* b, lapack_int* ldb,
                    float* w, float* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_dsygvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
                    double* w, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_chegvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb, float* w,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_zhegvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb, double* w,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_ssygvx( lapack_int* itype, char* jobz, char* range, char* uplo,
                    lapack_int* n, float* a, lapack_int* lda, float* b,
                    lapack_int* ldb, float* vl, float* vu, lapack_int* il,
                    lapack_int* iu, float* abstol, lapack_int* m, float* w,
                    float* z, lapack_int* ldz, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
void LAPACK_dsygvx( lapack_int* itype, char* jobz, char* range, char* uplo,
                    lapack_int* n, double* a, lapack_int* lda, double* b,
                    lapack_int* ldb, double* vl, double* vu, lapack_int* il,
                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
                    double* z, lapack_int* ldz, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
void LAPACK_chegvx( lapack_int* itype, char* jobz, char* range, char* uplo,
                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb, float* vl,
                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,
                    lapack_int* m, float* w, lapack_complex_float* z,
                    lapack_int* ldz, lapack_complex_float* work,
                    lapack_int* lwork, float* rwork, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_zhegvx( lapack_int* itype, char* jobz, char* range, char* uplo,
                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb, double* vl,
                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
                    lapack_int* m, double* w, lapack_complex_double* z,
                    lapack_int* ldz, lapack_complex_double* work,
                    lapack_int* lwork, double* rwork, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_sspgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                   float* ap, float* bp, float* w, float* z, lapack_int* ldz,
                   float* work, lapack_int *info );
void LAPACK_dspgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                   double* ap, double* bp, double* w, double* z,
                   lapack_int* ldz, double* work, lapack_int *info );
void LAPACK_chpgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                   lapack_complex_float* ap, lapack_complex_float* bp, float* w,
                   lapack_complex_float* z, lapack_int* ldz,
                   lapack_complex_float* work, float* rwork, lapack_int *info );
void LAPACK_zhpgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                   lapack_complex_double* ap, lapack_complex_double* bp,
                   double* w, lapack_complex_double* z, lapack_int* ldz,
                   lapack_complex_double* work, double* rwork,
                   lapack_int *info );
void LAPACK_sspgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                    float* ap, float* bp, float* w, float* z, lapack_int* ldz,
                    float* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_dspgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                    double* ap, double* bp, double* w, double* z,
                    lapack_int* ldz, double* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
void LAPACK_chpgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                    lapack_complex_float* ap, lapack_complex_float* bp,
                    float* w, lapack_complex_float* z, lapack_int* ldz,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_zhpgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
                    lapack_complex_double* ap, lapack_complex_double* bp,
                    double* w, lapack_complex_double* z, lapack_int* ldz,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_sspgvx( lapack_int* itype, char* jobz, char* range, char* uplo,
                    lapack_int* n, float* ap, float* bp, float* vl, float* vu,
                    lapack_int* il, lapack_int* iu, float* abstol,
                    lapack_int* m, float* w, float* z, lapack_int* ldz,
                    float* work, lapack_int* iwork, lapack_int* ifail,
                    lapack_int *info );
void LAPACK_dspgvx( lapack_int* itype, char* jobz, char* range, char* uplo,
                    lapack_int* n, double* ap, double* bp, double* vl,
                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
                    lapack_int* m, double* w, double* z, lapack_int* ldz,
                    double* work, lapack_int* iwork, lapack_int* ifail,
                    lapack_int *info );
void LAPACK_chpgvx( lapack_int* itype, char* jobz, char* range, char* uplo,
                    lapack_int* n, lapack_complex_float* ap,
                    lapack_complex_float* bp, float* vl, float* vu,
                    lapack_int* il, lapack_int* iu, float* abstol,
                    lapack_int* m, float* w, lapack_complex_float* z,
                    lapack_int* ldz, lapack_complex_float* work, float* rwork,
                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
void LAPACK_zhpgvx( lapack_int* itype, char* jobz, char* range, char* uplo,
                    lapack_int* n, lapack_complex_double* ap,
                    lapack_complex_double* bp, double* vl, double* vu,
                    lapack_int* il, lapack_int* iu, double* abstol,
                    lapack_int* m, double* w, lapack_complex_double* z,
                    lapack_int* ldz, lapack_complex_double* work, double* rwork,
                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
void LAPACK_ssbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
                   lapack_int* kb, float* ab, lapack_int* ldab, float* bb,
                   lapack_int* ldbb, float* w, float* z, lapack_int* ldz,
                   float* work, lapack_int *info );
void LAPACK_dsbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
                   lapack_int* kb, double* ab, lapack_int* ldab, double* bb,
                   lapack_int* ldbb, double* w, double* z, lapack_int* ldz,
                   double* work, lapack_int *info );
void LAPACK_chbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
                   lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab,
                   lapack_complex_float* bb, lapack_int* ldbb, float* w,
                   lapack_complex_float* z, lapack_int* ldz,
                   lapack_complex_float* work, float* rwork, lapack_int *info );
void LAPACK_zhbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
                   lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab,
                   lapack_complex_double* bb, lapack_int* ldbb, double* w,
                   lapack_complex_double* z, lapack_int* ldz,
                   lapack_complex_double* work, double* rwork,
                   lapack_int *info );
void LAPACK_ssbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
                    lapack_int* kb, float* ab, lapack_int* ldab, float* bb,
                    lapack_int* ldbb, float* w, float* z, lapack_int* ldz,
                    float* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_dsbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
                    lapack_int* kb, double* ab, lapack_int* ldab, double* bb,
                    lapack_int* ldbb, double* w, double* z, lapack_int* ldz,
                    double* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_chbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
                    lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab,
                    lapack_complex_float* bb, lapack_int* ldbb, float* w,
                    lapack_complex_float* z, lapack_int* ldz,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
                    lapack_int *info );
void LAPACK_zhbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
                    lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab,
                    lapack_complex_double* bb, lapack_int* ldbb, double* w,
                    lapack_complex_double* z, lapack_int* ldz,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_int *info );
void LAPACK_ssbgvx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_int* ka, lapack_int* kb, float* ab, lapack_int* ldab,
                    float* bb, lapack_int* ldbb, float* q, lapack_int* ldq,
                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
                    float* abstol, lapack_int* m, float* w, float* z,
                    lapack_int* ldz, float* work, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_dsbgvx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_int* ka, lapack_int* kb, double* ab,
                    lapack_int* ldab, double* bb, lapack_int* ldbb, double* q,
                    lapack_int* ldq, double* vl, double* vu, lapack_int* il,
                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_chbgvx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_int* ka, lapack_int* kb, lapack_complex_float* ab,
                    lapack_int* ldab, lapack_complex_float* bb,
                    lapack_int* ldbb, lapack_complex_float* q, lapack_int* ldq,
                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
                    float* abstol, lapack_int* m, float* w,
                    lapack_complex_float* z, lapack_int* ldz,
                    lapack_complex_float* work, float* rwork, lapack_int* iwork,
                    lapack_int* ifail, lapack_int *info );
void LAPACK_zhbgvx( char* jobz, char* range, char* uplo, lapack_int* n,
                    lapack_int* ka, lapack_int* kb, lapack_complex_double* ab,
                    lapack_int* ldab, lapack_complex_double* bb,
                    lapack_int* ldbb, lapack_complex_double* q, lapack_int* ldq,
                    double* vl, double* vu, lapack_int* il, lapack_int* iu,
                    double* abstol, lapack_int* m, double* w,
                    lapack_complex_double* z, lapack_int* ldz,
                    lapack_complex_double* work, double* rwork,
                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
void LAPACK_sgges( char* jobvsl, char* jobvsr, char* sort,
                   LAPACK_S_SELECT3 selctg, lapack_int* n, float* a,
                   lapack_int* lda, float* b, lapack_int* ldb, lapack_int* sdim,
                   float* alphar, float* alphai, float* beta, float* vsl,
                   lapack_int* ldvsl, float* vsr, lapack_int* ldvsr,
                   float* work, lapack_int* lwork, lapack_logical* bwork,
                   lapack_int *info );
void LAPACK_dgges( char* jobvsl, char* jobvsr, char* sort,
                   LAPACK_D_SELECT3 selctg, lapack_int* n, double* a,
                   lapack_int* lda, double* b, lapack_int* ldb,
                   lapack_int* sdim, double* alphar, double* alphai,
                   double* beta, double* vsl, lapack_int* ldvsl, double* vsr,
                   lapack_int* ldvsr, double* work, lapack_int* lwork,
                   lapack_logical* bwork, lapack_int *info );
void LAPACK_cgges( char* jobvsl, char* jobvsr, char* sort,
                   LAPACK_C_SELECT2 selctg, lapack_int* n,
                   lapack_complex_float* a, lapack_int* lda,
                   lapack_complex_float* b, lapack_int* ldb, lapack_int* sdim,
                   lapack_complex_float* alpha, lapack_complex_float* beta,
                   lapack_complex_float* vsl, lapack_int* ldvsl,
                   lapack_complex_float* vsr, lapack_int* ldvsr,
                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
                   lapack_logical* bwork, lapack_int *info );
void LAPACK_zgges( char* jobvsl, char* jobvsr, char* sort,
                   LAPACK_Z_SELECT2 selctg, lapack_int* n,
                   lapack_complex_double* a, lapack_int* lda,
                   lapack_complex_double* b, lapack_int* ldb, lapack_int* sdim,
                   lapack_complex_double* alpha, lapack_complex_double* beta,
                   lapack_complex_double* vsl, lapack_int* ldvsl,
                   lapack_complex_double* vsr, lapack_int* ldvsr,
                   lapack_complex_double* work, lapack_int* lwork,
                   double* rwork, lapack_logical* bwork, lapack_int *info );
void LAPACK_sggesx( char* jobvsl, char* jobvsr, char* sort,
                    LAPACK_S_SELECT3 selctg, char* sense, lapack_int* n,
                    float* a, lapack_int* lda, float* b, lapack_int* ldb,
                    lapack_int* sdim, float* alphar, float* alphai, float* beta,
                    float* vsl, lapack_int* ldvsl, float* vsr,
                    lapack_int* ldvsr, float* rconde, float* rcondv,
                    float* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_logical* bwork,
                    lapack_int *info );
void LAPACK_dggesx( char* jobvsl, char* jobvsr, char* sort,
                    LAPACK_D_SELECT3 selctg, char* sense, lapack_int* n,
                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
                    lapack_int* sdim, double* alphar, double* alphai,
                    double* beta, double* vsl, lapack_int* ldvsl, double* vsr,
                    lapack_int* ldvsr, double* rconde, double* rcondv,
                    double* work, lapack_int* lwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_logical* bwork,
                    lapack_int *info );
void LAPACK_cggesx( char* jobvsl, char* jobvsr, char* sort,
                    LAPACK_C_SELECT2 selctg, char* sense, lapack_int* n,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb, lapack_int* sdim,
                    lapack_complex_float* alpha, lapack_complex_float* beta,
                    lapack_complex_float* vsl, lapack_int* ldvsl,
                    lapack_complex_float* vsr, lapack_int* ldvsr, float* rconde,
                    float* rcondv, lapack_complex_float* work,
                    lapack_int* lwork, float* rwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_logical* bwork,
                    lapack_int *info );
void LAPACK_zggesx( char* jobvsl, char* jobvsr, char* sort,
                    LAPACK_Z_SELECT2 selctg, char* sense, lapack_int* n,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb, lapack_int* sdim,
                    lapack_complex_double* alpha, lapack_complex_double* beta,
                    lapack_complex_double* vsl, lapack_int* ldvsl,
                    lapack_complex_double* vsr, lapack_int* ldvsr,
                    double* rconde, double* rcondv, lapack_complex_double* work,
                    lapack_int* lwork, double* rwork, lapack_int* iwork,
                    lapack_int* liwork, lapack_logical* bwork,
                    lapack_int *info );
void LAPACK_sggev( char* jobvl, char* jobvr, lapack_int* n, float* a,
                   lapack_int* lda, float* b, lapack_int* ldb, float* alphar,
                   float* alphai, float* beta, float* vl, lapack_int* ldvl,
                   float* vr, lapack_int* ldvr, float* work, lapack_int* lwork,
                   lapack_int *info );
void LAPACK_dggev( char* jobvl, char* jobvr, lapack_int* n, double* a,
                   lapack_int* lda, double* b, lapack_int* ldb, double* alphar,
                   double* alphai, double* beta, double* vl, lapack_int* ldvl,
                   double* vr, lapack_int* ldvr, double* work,
                   lapack_int* lwork, lapack_int *info );
void LAPACK_cggev( char* jobvl, char* jobvr, lapack_int* n,
                   lapack_complex_float* a, lapack_int* lda,
                   lapack_complex_float* b, lapack_int* ldb,
                   lapack_complex_float* alpha, lapack_complex_float* beta,
                   lapack_complex_float* vl, lapack_int* ldvl,
                   lapack_complex_float* vr, lapack_int* ldvr,
                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
                   lapack_int *info );
void LAPACK_zggev( char* jobvl, char* jobvr, lapack_int* n,
                   lapack_complex_double* a, lapack_int* lda,
                   lapack_complex_double* b, lapack_int* ldb,
                   lapack_complex_double* alpha, lapack_complex_double* beta,
                   lapack_complex_double* vl, lapack_int* ldvl,
                   lapack_complex_double* vr, lapack_int* ldvr,
                   lapack_complex_double* work, lapack_int* lwork,
                   double* rwork, lapack_int *info );
void LAPACK_sggevx( char* balanc, char* jobvl, char* jobvr, char* sense,
                    lapack_int* n, float* a, lapack_int* lda, float* b,
                    lapack_int* ldb, float* alphar, float* alphai, float* beta,
                    float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr,
                    lapack_int* ilo, lapack_int* ihi, float* lscale,
                    float* rscale, float* abnrm, float* bbnrm, float* rconde,
                    float* rcondv, float* work, lapack_int* lwork,
                    lapack_int* iwork, lapack_logical* bwork,
                    lapack_int *info );
void LAPACK_dggevx( char* balanc, char* jobvl, char* jobvr, char* sense,
                    lapack_int* n, double* a, lapack_int* lda, double* b,
                    lapack_int* ldb, double* alphar, double* alphai,
                    double* beta, double* vl, lapack_int* ldvl, double* vr,
                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,
                    double* lscale, double* rscale, double* abnrm,
                    double* bbnrm, double* rconde, double* rcondv, double* work,
                    lapack_int* lwork, lapack_int* iwork, lapack_logical* bwork,
                    lapack_int *info );
void LAPACK_cggevx( char* balanc, char* jobvl, char* jobvr, char* sense,
                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* alpha, lapack_complex_float* beta,
                    lapack_complex_float* vl, lapack_int* ldvl,
                    lapack_complex_float* vr, lapack_int* ldvr, lapack_int* ilo,
                    lapack_int* ihi, float* lscale, float* rscale, float* abnrm,
                    float* bbnrm, float* rconde, float* rcondv,
                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
                    lapack_int* iwork, lapack_logical* bwork,
                    lapack_int *info );
void LAPACK_zggevx( char* balanc, char* jobvl, char* jobvr, char* sense,
                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* alpha, lapack_complex_double* beta,
                    lapack_complex_double* vl, lapack_int* ldvl,
                    lapack_complex_double* vr, lapack_int* ldvr,
                    lapack_int* ilo, lapack_int* ihi, double* lscale,
                    double* rscale, double* abnrm, double* bbnrm,
                    double* rconde, double* rcondv, lapack_complex_double* work,
                    lapack_int* lwork, double* rwork, lapack_int* iwork,
                    lapack_logical* bwork, lapack_int *info );
void LAPACK_dsfrk( char* transr, char* uplo, char* trans, lapack_int* n,
                   lapack_int* k, double* alpha, const double* a,
                   lapack_int* lda, double* beta, double* c );
void LAPACK_ssfrk( char* transr, char* uplo, char* trans, lapack_int* n,
                   lapack_int* k, float* alpha, const float* a, lapack_int* lda,
                   float* beta, float* c );
void LAPACK_zhfrk( char* transr, char* uplo, char* trans, lapack_int* n,
                   lapack_int* k, double* alpha, const lapack_complex_double* a,
                   lapack_int* lda, double* beta, lapack_complex_double* c );
void LAPACK_chfrk( char* transr, char* uplo, char* trans, lapack_int* n,
                   lapack_int* k, float* alpha, const lapack_complex_float* a,
                   lapack_int* lda, float* beta, lapack_complex_float* c );
void LAPACK_dtfsm( char* transr, char* side, char* uplo, char* trans,
                   char* diag, lapack_int* m, lapack_int* n, double* alpha,
                   const double* a, double* b, lapack_int* ldb );
void LAPACK_stfsm( char* transr, char* side, char* uplo, char* trans,
                   char* diag, lapack_int* m, lapack_int* n, float* alpha,
                   const float* a, float* b, lapack_int* ldb );
void LAPACK_ztfsm( char* transr, char* side, char* uplo, char* trans,
                   char* diag, lapack_int* m, lapack_int* n,
                   lapack_complex_double* alpha, const lapack_complex_double* a,
                   lapack_complex_double* b, lapack_int* ldb );
void LAPACK_ctfsm( char* transr, char* side, char* uplo, char* trans,
                   char* diag, lapack_int* m, lapack_int* n,
                   lapack_complex_float* alpha, const lapack_complex_float* a,
                   lapack_complex_float* b, lapack_int* ldb );
void LAPACK_dtfttp( char* transr, char* uplo, lapack_int* n, const double* arf,
                    double* ap, lapack_int *info );
void LAPACK_stfttp( char* transr, char* uplo, lapack_int* n, const float* arf,
                    float* ap, lapack_int *info );
void LAPACK_ztfttp( char* transr, char* uplo, lapack_int* n,
                    const lapack_complex_double* arf, lapack_complex_double* ap,
                    lapack_int *info );
void LAPACK_ctfttp( char* transr, char* uplo, lapack_int* n,
                    const lapack_complex_float* arf, lapack_complex_float* ap,
                    lapack_int *info );
void LAPACK_dtfttr( char* transr, char* uplo, lapack_int* n, const double* arf,
                    double* a, lapack_int* lda, lapack_int *info );
void LAPACK_stfttr( char* transr, char* uplo, lapack_int* n, const float* arf,
                    float* a, lapack_int* lda, lapack_int *info );
void LAPACK_ztfttr( char* transr, char* uplo, lapack_int* n,
                    const lapack_complex_double* arf, lapack_complex_double* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_ctfttr( char* transr, char* uplo, lapack_int* n,
                    const lapack_complex_float* arf, lapack_complex_float* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_dtpttf( char* transr, char* uplo, lapack_int* n, const double* ap,
                    double* arf, lapack_int *info );
void LAPACK_stpttf( char* transr, char* uplo, lapack_int* n, const float* ap,
                    float* arf, lapack_int *info );
void LAPACK_ztpttf( char* transr, char* uplo, lapack_int* n,
                    const lapack_complex_double* ap, lapack_complex_double* arf,
                    lapack_int *info );
void LAPACK_ctpttf( char* transr, char* uplo, lapack_int* n,
                    const lapack_complex_float* ap, lapack_complex_float* arf,
                    lapack_int *info );
void LAPACK_dtpttr( char* uplo, lapack_int* n, const double* ap, double* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_stpttr( char* uplo, lapack_int* n, const float* ap, float* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_ztpttr( char* uplo, lapack_int* n, const lapack_complex_double* ap,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_ctpttr( char* uplo, lapack_int* n, const lapack_complex_float* ap,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_dtrttf( char* transr, char* uplo, lapack_int* n, const double* a,
                    lapack_int* lda, double* arf, lapack_int *info );
void LAPACK_strttf( char* transr, char* uplo, lapack_int* n, const float* a,
                    lapack_int* lda, float* arf, lapack_int *info );
void LAPACK_ztrttf( char* transr, char* uplo, lapack_int* n,
                    const lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* arf, lapack_int *info );
void LAPACK_ctrttf( char* transr, char* uplo, lapack_int* n,
                    const lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* arf, lapack_int *info );
void LAPACK_dtrttp( char* uplo, lapack_int* n, const double* a, lapack_int* lda,
                    double* ap, lapack_int *info );
void LAPACK_strttp( char* uplo, lapack_int* n, const float* a, lapack_int* lda,
                    float* ap, lapack_int *info );
void LAPACK_ztrttp( char* uplo, lapack_int* n, const lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* ap,
                    lapack_int *info );
void LAPACK_ctrttp( char* uplo, lapack_int* n, const lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* ap,
                    lapack_int *info );
void LAPACK_sgeqrfp( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                     float* tau, float* work, lapack_int* lwork,
                     lapack_int *info );
void LAPACK_dgeqrfp( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                     double* tau, double* work, lapack_int* lwork,
                     lapack_int *info );
void LAPACK_cgeqrfp( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                     lapack_int* lda, lapack_complex_float* tau,
                     lapack_complex_float* work, lapack_int* lwork,
                     lapack_int *info );
void LAPACK_zgeqrfp( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                     lapack_int* lda, lapack_complex_double* tau,
                     lapack_complex_double* work, lapack_int* lwork,
                     lapack_int *info );
void LAPACK_clacgv( lapack_int* n, lapack_complex_float* x, lapack_int* incx );
void LAPACK_zlacgv( lapack_int* n, lapack_complex_double* x, lapack_int* incx );
void LAPACK_slarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,
                    float* x );
void LAPACK_dlarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,
                    double* x );
void LAPACK_clarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,
                    lapack_complex_float* x );
void LAPACK_zlarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,
                    lapack_complex_double* x );
void LAPACK_sgeqr2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    float* tau, float* work, lapack_int *info );
void LAPACK_dgeqr2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    double* tau, double* work, lapack_int *info );
void LAPACK_cgeqr2( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* tau,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zgeqr2( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_slacn2( lapack_int* n, float* v, float* x, lapack_int* isgn,
                    float* est, lapack_int* kase, lapack_int* isave );
void LAPACK_dlacn2( lapack_int* n, double* v, double* x, lapack_int* isgn,
                    double* est, lapack_int* kase, lapack_int* isave );
void LAPACK_clacn2( lapack_int* n, lapack_complex_float* v,
                    lapack_complex_float* x, float* est,
                    lapack_int* kase, lapack_int* isave );
void LAPACK_zlacn2( lapack_int* n, lapack_complex_double* v,
                    lapack_complex_double* x, double* est,
                    lapack_int* kase, lapack_int* isave );
void LAPACK_slacpy( char* uplo, lapack_int* m, lapack_int* n, const float* a,
                    lapack_int* lda, float* b, lapack_int* ldb );
void LAPACK_dlacpy( char* uplo, lapack_int* m, lapack_int* n, const double* a,
                    lapack_int* lda, double* b, lapack_int* ldb );
void LAPACK_clacpy( char* uplo, lapack_int* m, lapack_int* n,
                    const lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb );
void LAPACK_zlacpy( char* uplo, lapack_int* m, lapack_int* n,
                    const lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb );

void LAPACK_clacp2( char* uplo, lapack_int* m, lapack_int* n, const float* a,
                    lapack_int* lda, lapack_complex_float* b, lapack_int* ldb );
void LAPACK_zlacp2( char* uplo, lapack_int* m, lapack_int* n, const double* a,
                    lapack_int* lda, lapack_complex_double* b,
                    lapack_int* ldb );

void LAPACK_sgetf2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_dgetf2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int* ipiv, lapack_int *info );
void LAPACK_cgetf2( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );
void LAPACK_zgetf2( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );
void LAPACK_slaswp( lapack_int* n, float* a, lapack_int* lda, lapack_int* k1,
                    lapack_int* k2, const lapack_int* ipiv, lapack_int* incx );
void LAPACK_dlaswp( lapack_int* n, double* a, lapack_int* lda, lapack_int* k1,
                    lapack_int* k2, const lapack_int* ipiv, lapack_int* incx );
void LAPACK_claswp( lapack_int* n, lapack_complex_float* a, lapack_int* lda,
                    lapack_int* k1, lapack_int* k2, const lapack_int* ipiv,
                    lapack_int* incx );
void LAPACK_zlaswp( lapack_int* n, lapack_complex_double* a, lapack_int* lda,
                    lapack_int* k1, lapack_int* k2, const lapack_int* ipiv,
                    lapack_int* incx );
float LAPACK_slange( char* norm, lapack_int* m, lapack_int* n, const float* a,
                    lapack_int* lda, float* work );
double LAPACK_dlange( char* norm, lapack_int* m, lapack_int* n, const double* a,
                    lapack_int* lda, double* work );
float LAPACK_clange( char* norm, lapack_int* m, lapack_int* n,
                    const lapack_complex_float* a, lapack_int* lda, float* work );
double LAPACK_zlange( char* norm, lapack_int* m, lapack_int* n,
                    const lapack_complex_double* a, lapack_int* lda, double* work );
float LAPACK_clanhe( char* norm, char* uplo, lapack_int* n,
                    const lapack_complex_float* a, lapack_int* lda, float* work );
double LAPACK_zlanhe( char* norm, char* uplo, lapack_int* n,
                    const lapack_complex_double* a, lapack_int* lda, double* work );
float LAPACK_slansy( char* norm, char* uplo, lapack_int* n, const float* a,
                    lapack_int* lda, float* work );
double LAPACK_dlansy( char* norm, char* uplo, lapack_int* n, const double* a,
                    lapack_int* lda, double* work );
float LAPACK_clansy( char* norm, char* uplo, lapack_int* n,
                    const lapack_complex_float* a, lapack_int* lda, float* work );
double LAPACK_zlansy( char* norm, char* uplo, lapack_int* n,
                    const lapack_complex_double* a, lapack_int* lda, double* work );
float LAPACK_slantr( char* norm, char* uplo, char* diag, lapack_int* m,
                    lapack_int* n, const float* a, lapack_int* lda, float* work );
double LAPACK_dlantr( char* norm, char* uplo, char* diag, lapack_int* m,
                    lapack_int* n, const double* a, lapack_int* lda, double* work );
float LAPACK_clantr( char* norm, char* uplo, char* diag, lapack_int* m,
                    lapack_int* n, const lapack_complex_float* a, lapack_int* lda,
                    float* work );
double LAPACK_zlantr( char* norm, char* uplo, char* diag, lapack_int* m,
                    lapack_int* n, const lapack_complex_double* a, lapack_int* lda,
                    double* work );
float LAPACK_slamch( char* cmach );
double LAPACK_dlamch( char* cmach );
void LAPACK_sgelq2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                    float* tau, float* work, lapack_int *info );
void LAPACK_dgelq2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                    double* tau, double* work, lapack_int *info );
void LAPACK_cgelq2( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_complex_float* tau,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zgelq2( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_complex_double* tau,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_slarfb( char* side, char* trans, char* direct, char* storev,
                    lapack_int* m, lapack_int* n, lapack_int* k, const float* v,
                    lapack_int* ldv, const float* t, lapack_int* ldt, float* c,
                    lapack_int* ldc, float* work, lapack_int* ldwork );
void LAPACK_dlarfb( char* side, char* trans, char* direct, char* storev,
                    lapack_int* m, lapack_int* n, lapack_int* k,
                    const double* v, lapack_int* ldv, const double* t,
                    lapack_int* ldt, double* c, lapack_int* ldc, double* work,
                    lapack_int* ldwork );
void LAPACK_clarfb( char* side, char* trans, char* direct, char* storev,
                    lapack_int* m, lapack_int* n, lapack_int* k,
                    const lapack_complex_float* v, lapack_int* ldv,
                    const lapack_complex_float* t, lapack_int* ldt,
                    lapack_complex_float* c, lapack_int* ldc,
                    lapack_complex_float* work, lapack_int* ldwork );
void LAPACK_zlarfb( char* side, char* trans, char* direct, char* storev,
                    lapack_int* m, lapack_int* n, lapack_int* k,
                    const lapack_complex_double* v, lapack_int* ldv,
                    const lapack_complex_double* t, lapack_int* ldt,
                    lapack_complex_double* c, lapack_int* ldc,
                    lapack_complex_double* work, lapack_int* ldwork );
void LAPACK_slarfg( lapack_int* n, float* alpha, float* x, lapack_int* incx,
                    float* tau );
void LAPACK_dlarfg( lapack_int* n, double* alpha, double* x, lapack_int* incx,
                    double* tau );
void LAPACK_clarfg( lapack_int* n, lapack_complex_float* alpha,
                    lapack_complex_float* x, lapack_int* incx,
                    lapack_complex_float* tau );
void LAPACK_zlarfg( lapack_int* n, lapack_complex_double* alpha,
                    lapack_complex_double* x, lapack_int* incx,
                    lapack_complex_double* tau );
void LAPACK_slarft( char* direct, char* storev, lapack_int* n, lapack_int* k,
                    const float* v, lapack_int* ldv, const float* tau, float* t,
                    lapack_int* ldt );
void LAPACK_dlarft( char* direct, char* storev, lapack_int* n, lapack_int* k,
                    const double* v, lapack_int* ldv, const double* tau,
                    double* t, lapack_int* ldt );
void LAPACK_clarft( char* direct, char* storev, lapack_int* n, lapack_int* k,
                    const lapack_complex_float* v, lapack_int* ldv,
                    const lapack_complex_float* tau, lapack_complex_float* t,
                    lapack_int* ldt );
void LAPACK_zlarft( char* direct, char* storev, lapack_int* n, lapack_int* k,
                    const lapack_complex_double* v, lapack_int* ldv,
                    const lapack_complex_double* tau, lapack_complex_double* t,
                    lapack_int* ldt );
void LAPACK_slarfx( char* side, lapack_int* m, lapack_int* n, const float* v,
                    float* tau, float* c, lapack_int* ldc, float* work );
void LAPACK_dlarfx( char* side, lapack_int* m, lapack_int* n, const double* v,
                    double* tau, double* c, lapack_int* ldc, double* work );
void LAPACK_clarfx( char* side, lapack_int* m, lapack_int* n,
                    const lapack_complex_float* v, lapack_complex_float* tau,
                    lapack_complex_float* c, lapack_int* ldc,
                    lapack_complex_float* work );
void LAPACK_zlarfx( char* side, lapack_int* m, lapack_int* n,
                    const lapack_complex_double* v, lapack_complex_double* tau,
                    lapack_complex_double* c, lapack_int* ldc,
                    lapack_complex_double* work );
void LAPACK_slatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,
                    char* sym, float* d, lapack_int* mode, float* cond,
                    float* dmax, lapack_int* kl, lapack_int* ku, char* pack,
                    float* a, lapack_int* lda, float* work, lapack_int *info );
void LAPACK_dlatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,
                    char* sym, double* d, lapack_int* mode, double* cond,
                    double* dmax, lapack_int* kl, lapack_int* ku, char* pack,
                    double* a, lapack_int* lda, double* work,
                    lapack_int *info );
void LAPACK_clatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,
                    char* sym, float* d, lapack_int* mode, float* cond,
                    float* dmax, lapack_int* kl, lapack_int* ku, char* pack,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zlatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,
                    char* sym, double* d, lapack_int* mode, double* cond,
                    double* dmax, lapack_int* kl, lapack_int* ku, char* pack,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_slag2d( lapack_int* m, lapack_int* n, const float* sa,
                    lapack_int* ldsa, double* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_dlag2s( lapack_int* m, lapack_int* n, const double* a,
                    lapack_int* lda, float* sa, lapack_int* ldsa,
                    lapack_int *info );
void LAPACK_clag2z( lapack_int* m, lapack_int* n,
                    const lapack_complex_float* sa, lapack_int* ldsa,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_zlag2c( lapack_int* m, lapack_int* n,
                    const lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_float* sa, lapack_int* ldsa,
                    lapack_int *info );
void LAPACK_slauum( char* uplo, lapack_int* n, float* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_dlauum( char* uplo, lapack_int* n, double* a, lapack_int* lda,
                    lapack_int *info );
void LAPACK_clauum( char* uplo, lapack_int* n, lapack_complex_float* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_zlauum( char* uplo, lapack_int* n, lapack_complex_double* a,
                    lapack_int* lda, lapack_int *info );
void LAPACK_slagge( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, const float* d, float* a, lapack_int* lda,
                    lapack_int* iseed, float* work, lapack_int *info );
void LAPACK_dlagge( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, const double* d, double* a, lapack_int* lda,
                    lapack_int* iseed, double* work, lapack_int *info );
void LAPACK_clagge( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, const float* d, lapack_complex_float* a,
                    lapack_int* lda, lapack_int* iseed,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zlagge( lapack_int* m, lapack_int* n, lapack_int* kl,
                    lapack_int* ku, const double* d, lapack_complex_double* a,
                    lapack_int* lda, lapack_int* iseed,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_slaset( char* uplo, lapack_int* m, lapack_int* n, float* alpha,
                    float* beta, float* a, lapack_int* lda );
void LAPACK_dlaset( char* uplo, lapack_int* m, lapack_int* n, double* alpha,
                    double* beta, double* a, lapack_int* lda );
void LAPACK_claset( char* uplo, lapack_int* m, lapack_int* n,
                    lapack_complex_float* alpha, lapack_complex_float* beta,
                    lapack_complex_float* a, lapack_int* lda );
void LAPACK_zlaset( char* uplo, lapack_int* m, lapack_int* n,
                    lapack_complex_double* alpha, lapack_complex_double* beta,
                    lapack_complex_double* a, lapack_int* lda );
void LAPACK_slasrt( char* id, lapack_int* n, float* d, lapack_int *info );
void LAPACK_dlasrt( char* id, lapack_int* n, double* d, lapack_int *info );
void LAPACK_claghe( lapack_int* n, lapack_int* k, const float* d,
                    lapack_complex_float* a, lapack_int* lda, lapack_int* iseed,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zlaghe( lapack_int* n, lapack_int* k, const double* d,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_int* iseed, lapack_complex_double* work,
                    lapack_int *info );
void LAPACK_slagsy( lapack_int* n, lapack_int* k, const float* d, float* a,
                    lapack_int* lda, lapack_int* iseed, float* work,
                    lapack_int *info );
void LAPACK_dlagsy( lapack_int* n, lapack_int* k, const double* d, double* a,
                    lapack_int* lda, lapack_int* iseed, double* work,
                    lapack_int *info );
void LAPACK_clagsy( lapack_int* n, lapack_int* k, const float* d,
                    lapack_complex_float* a, lapack_int* lda, lapack_int* iseed,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zlagsy( lapack_int* n, lapack_int* k, const double* d,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_int* iseed, lapack_complex_double* work,
                    lapack_int *info );
void LAPACK_slapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,
                    float* x, lapack_int* ldx, lapack_int* k );
void LAPACK_dlapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,
                    double* x, lapack_int* ldx, lapack_int* k );
void LAPACK_clapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,
                    lapack_complex_float* x, lapack_int* ldx, lapack_int* k );
void LAPACK_zlapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,
                    lapack_complex_double* x, lapack_int* ldx, lapack_int* k );
float LAPACK_slapy2( float* x, float* y );
double LAPACK_dlapy2( double* x, double* y );
float LAPACK_slapy3( float* x, float* y, float* z );
double LAPACK_dlapy3( double* x, double* y, double* z );
void LAPACK_slartgp( float* f, float* g, float* cs, float* sn, float* r );
void LAPACK_dlartgp( double* f, double* g, double* cs, double* sn, double* r );
void LAPACK_slartgs( float* x, float* y, float* sigma, float* cs, float* sn );
void LAPACK_dlartgs( double* x, double* y, double* sigma, double* cs,
                     double* sn );
// LAPACK 3.3.0
void LAPACK_cbbcsd( char* jobu1, char* jobu2,
                    char* jobv1t, char* jobv2t, char* trans,
                    lapack_int* m, lapack_int* p, lapack_int* q,
                    float* theta, float* phi,
                    lapack_complex_float* u1, lapack_int* ldu1,
                    lapack_complex_float* u2, lapack_int* ldu2,
                    lapack_complex_float* v1t, lapack_int* ldv1t,
                    lapack_complex_float* v2t, lapack_int* ldv2t,
                    float* b11d, float* b11e, float* b12d,
                    float* b12e, float* b21d, float* b21e,
                    float* b22d, float* b22e, float* rwork,
                    lapack_int* lrwork , lapack_int *info );
void LAPACK_cheswapr( char* uplo, lapack_int* n,
                      lapack_complex_float* a, lapack_int* i1,
                      lapack_int* i2 );
void LAPACK_chetri2( char* uplo, lapack_int* n,
                     lapack_complex_float* a, lapack_int* lda,
                     const lapack_int* ipiv,
                     lapack_complex_float* work, lapack_int* lwork , lapack_int *info );
void LAPACK_chetri2x( char* uplo, lapack_int* n,
                      lapack_complex_float* a, lapack_int* lda,
                      const lapack_int* ipiv,
                      lapack_complex_float* work, lapack_int* nb , lapack_int *info );
void LAPACK_chetrs2( char* uplo, lapack_int* n,
                     lapack_int* nrhs, const lapack_complex_float* a,
                     lapack_int* lda, const lapack_int* ipiv,
                     lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* work , lapack_int *info );
void LAPACK_csyconv( char* uplo, char* way,
                     lapack_int* n, lapack_complex_float* a,
                     lapack_int* lda, const lapack_int* ipiv,
                     lapack_complex_float* work , lapack_int *info );
void LAPACK_csyswapr( char* uplo, lapack_int* n,
                      lapack_complex_float* a, lapack_int* i1,
                      lapack_int* i2 );
void LAPACK_csytri2( char* uplo, lapack_int* n,
                     lapack_complex_float* a, lapack_int* lda,
                     const lapack_int* ipiv,
                     lapack_complex_float* work, lapack_int* lwork , lapack_int *info );
void LAPACK_csytri2x( char* uplo, lapack_int* n,
                      lapack_complex_float* a, lapack_int* lda,
                      const lapack_int* ipiv,
                      lapack_complex_float* work, lapack_int* nb , lapack_int *info );
void LAPACK_csytrs2( char* uplo, lapack_int* n,
                     lapack_int* nrhs, const lapack_complex_float* a,
                     lapack_int* lda, const lapack_int* ipiv,
                     lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* work , lapack_int *info );
void LAPACK_cunbdb( char* trans, char* signs,
                    lapack_int* m, lapack_int* p, lapack_int* q,
                    lapack_complex_float* x11, lapack_int* ldx11,
                    lapack_complex_float* x12, lapack_int* ldx12,
                    lapack_complex_float* x21, lapack_int* ldx21,
                    lapack_complex_float* x22, lapack_int* ldx22,
                    float* theta, float* phi,
                    lapack_complex_float* taup1,
                    lapack_complex_float* taup2,
                    lapack_complex_float* tauq1,
                    lapack_complex_float* tauq2,
                    lapack_complex_float* work, lapack_int* lwork , lapack_int *info );
void LAPACK_cuncsd( char* jobu1, char* jobu2,
                    char* jobv1t, char* jobv2t, char* trans,
                    char* signs, lapack_int* m, lapack_int* p,
                    lapack_int* q, lapack_complex_float* x11,
                    lapack_int* ldx11, lapack_complex_float* x12,
                    lapack_int* ldx12, lapack_complex_float* x21,
                    lapack_int* ldx21, lapack_complex_float* x22,
                    lapack_int* ldx22, float* theta,
                    lapack_complex_float* u1, lapack_int* ldu1,
                    lapack_complex_float* u2, lapack_int* ldu2,
                    lapack_complex_float* v1t, lapack_int* ldv1t,
                    lapack_complex_float* v2t, lapack_int* ldv2t,
                    lapack_complex_float* work, lapack_int* lwork,
                    float* rwork, lapack_int* lrwork,
                    lapack_int* iwork , lapack_int *info );
void LAPACK_dbbcsd( char* jobu1, char* jobu2,
                    char* jobv1t, char* jobv2t, char* trans,
                    lapack_int* m, lapack_int* p, lapack_int* q,
                    double* theta, double* phi, double* u1,
                    lapack_int* ldu1, double* u2, lapack_int* ldu2,
                    double* v1t, lapack_int* ldv1t, double* v2t,
                    lapack_int* ldv2t, double* b11d, double* b11e,
                    double* b12d, double* b12e, double* b21d,
                    double* b21e, double* b22d, double* b22e,
                    double* work, lapack_int* lwork , lapack_int *info );
void LAPACK_dorbdb( char* trans, char* signs,
                    lapack_int* m, lapack_int* p, lapack_int* q,
                    double* x11, lapack_int* ldx11, double* x12,
                    lapack_int* ldx12, double* x21, lapack_int* ldx21,
                    double* x22, lapack_int* ldx22, double* theta,
                    double* phi, double* taup1, double* taup2,
                    double* tauq1, double* tauq2, double* work,
                    lapack_int* lwork , lapack_int *info );
void LAPACK_dorcsd( char* jobu1, char* jobu2,
                    char* jobv1t, char* jobv2t, char* trans,
                    char* signs, lapack_int* m, lapack_int* p,
                    lapack_int* q, double* x11, lapack_int* ldx11,
                    double* x12, lapack_int* ldx12, double* x21,
                    lapack_int* ldx21, double* x22, lapack_int* ldx22,
                    double* theta, double* u1, lapack_int* ldu1,
                    double* u2, lapack_int* ldu2, double* v1t,
                    lapack_int* ldv1t, double* v2t, lapack_int* ldv2t,
                    double* work, lapack_int* lwork,
                    lapack_int* iwork , lapack_int *info );
void LAPACK_dsyconv( char* uplo, char* way,
                     lapack_int* n, double* a, lapack_int* lda,
                     const lapack_int* ipiv, double* work , lapack_int *info );
void LAPACK_dsyswapr( char* uplo, lapack_int* n,
                      double* a, lapack_int* i1, lapack_int* i2 );
void LAPACK_dsytri2( char* uplo, lapack_int* n,
                     double* a, lapack_int* lda,
                     const lapack_int* ipiv,
                     lapack_complex_double* work, lapack_int* lwork , lapack_int *info );
void LAPACK_dsytri2x( char* uplo, lapack_int* n,
                      double* a, lapack_int* lda,
                      const lapack_int* ipiv, double* work,
                      lapack_int* nb , lapack_int *info );
void LAPACK_dsytrs2( char* uplo, lapack_int* n,
                     lapack_int* nrhs, const double* a,
                     lapack_int* lda, const lapack_int* ipiv,
                     double* b, lapack_int* ldb, double* work , lapack_int *info );
void LAPACK_sbbcsd( char* jobu1, char* jobu2,
                    char* jobv1t, char* jobv2t, char* trans,
                    lapack_int* m, lapack_int* p, lapack_int* q,
                    float* theta, float* phi, float* u1,
                    lapack_int* ldu1, float* u2, lapack_int* ldu2,
                    float* v1t, lapack_int* ldv1t, float* v2t,
                    lapack_int* ldv2t, float* b11d, float* b11e,
                    float* b12d, float* b12e, float* b21d,
                    float* b21e, float* b22d, float* b22e,
                    float* work, lapack_int* lwork , lapack_int *info );
void LAPACK_sorbdb( char* trans, char* signs,
                    lapack_int* m, lapack_int* p, lapack_int* q,
                    float* x11, lapack_int* ldx11, float* x12,
                    lapack_int* ldx12, float* x21, lapack_int* ldx21,
                    float* x22, lapack_int* ldx22, float* theta,
                    float* phi, float* taup1, float* taup2,
                    float* tauq1, float* tauq2, float* work,
                    lapack_int* lwork , lapack_int *info );
void LAPACK_sorcsd( char* jobu1, char* jobu2,
                    char* jobv1t, char* jobv2t, char* trans,
                    char* signs, lapack_int* m, lapack_int* p,
                    lapack_int* q, float* x11, lapack_int* ldx11,
                    float* x12, lapack_int* ldx12, float* x21,
                    lapack_int* ldx21, float* x22, lapack_int* ldx22,
                    float* theta, float* u1, lapack_int* ldu1,
                    float* u2, lapack_int* ldu2, float* v1t,
                    lapack_int* ldv1t, float* v2t, lapack_int* ldv2t,
                    float* work, lapack_int* lwork,
                    lapack_int* iwork , lapack_int *info );
void LAPACK_ssyconv( char* uplo, char* way,
                     lapack_int* n, float* a, lapack_int* lda,
                     const lapack_int* ipiv, float* work , lapack_int *info );
void LAPACK_ssyswapr( char* uplo, lapack_int* n,
                      float* a, lapack_int* i1, lapack_int* i2 );
void LAPACK_ssytri2( char* uplo, lapack_int* n,
                     float* a, lapack_int* lda,
                     const lapack_int* ipiv,
                     lapack_complex_float* work, lapack_int* lwork , lapack_int *info );
void LAPACK_ssytri2x( char* uplo, lapack_int* n,
                      float* a, lapack_int* lda,
                      const lapack_int* ipiv, float* work,
                      lapack_int* nb , lapack_int *info );
void LAPACK_ssytrs2( char* uplo, lapack_int* n,
                     lapack_int* nrhs, const float* a,
                     lapack_int* lda, const lapack_int* ipiv,
                     float* b, lapack_int* ldb, float* work , lapack_int *info );
void LAPACK_zbbcsd( char* jobu1, char* jobu2,
                    char* jobv1t, char* jobv2t, char* trans,
                    lapack_int* m, lapack_int* p, lapack_int* q,
                    double* theta, double* phi,
                    lapack_complex_double* u1, lapack_int* ldu1,
                    lapack_complex_double* u2, lapack_int* ldu2,
                    lapack_complex_double* v1t, lapack_int* ldv1t,
                    lapack_complex_double* v2t, lapack_int* ldv2t,
                    double* b11d, double* b11e, double* b12d,
                    double* b12e, double* b21d, double* b21e,
                    double* b22d, double* b22e, double* rwork,
                    lapack_int* lrwork , lapack_int *info );
void LAPACK_zheswapr( char* uplo, lapack_int* n,
                      lapack_complex_double* a, lapack_int* i1,
                      lapack_int* i2 );
void LAPACK_zhetri2( char* uplo, lapack_int* n,
                     lapack_complex_double* a, lapack_int* lda,
                     const lapack_int* ipiv,
                     lapack_complex_double* work, lapack_int* lwork , lapack_int *info );
void LAPACK_zhetri2x( char* uplo, lapack_int* n,
                      lapack_complex_double* a, lapack_int* lda,
                      const lapack_int* ipiv,
                      lapack_complex_double* work, lapack_int* nb , lapack_int *info );
void LAPACK_zhetrs2( char* uplo, lapack_int* n,
                     lapack_int* nrhs,
                     const lapack_complex_double* a, lapack_int* lda,
                     const lapack_int* ipiv,
                     lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* work , lapack_int *info );
void LAPACK_zsyconv( char* uplo, char* way,
                     lapack_int* n, lapack_complex_double* a,
                     lapack_int* lda, const lapack_int* ipiv,
                     lapack_complex_double* work , lapack_int *info );
void LAPACK_zsyswapr( char* uplo, lapack_int* n,
                      lapack_complex_double* a, lapack_int* i1,
                      lapack_int* i2 );
void LAPACK_zsytri2( char* uplo, lapack_int* n,
                     lapack_complex_double* a, lapack_int* lda,
                     const lapack_int* ipiv,
                     lapack_complex_double* work, lapack_int* lwork , lapack_int *info );
void LAPACK_zsytri2x( char* uplo, lapack_int* n,
                      lapack_complex_double* a, lapack_int* lda,
                      const lapack_int* ipiv,
                      lapack_complex_double* work, lapack_int* nb , lapack_int *info );
void LAPACK_zsytrs2( char* uplo, lapack_int* n,
                     lapack_int* nrhs,
                     const lapack_complex_double* a, lapack_int* lda,
                     const lapack_int* ipiv,
                     lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* work , lapack_int *info );
void LAPACK_zunbdb( char* trans, char* signs,
                    lapack_int* m, lapack_int* p, lapack_int* q,
                    lapack_complex_double* x11, lapack_int* ldx11,
                    lapack_complex_double* x12, lapack_int* ldx12,
                    lapack_complex_double* x21, lapack_int* ldx21,
                    lapack_complex_double* x22, lapack_int* ldx22,
                    double* theta, double* phi,
                    lapack_complex_double* taup1,
                    lapack_complex_double* taup2,
                    lapack_complex_double* tauq1,
                    lapack_complex_double* tauq2,
                    lapack_complex_double* work, lapack_int* lwork , lapack_int *info );
void LAPACK_zuncsd( char* jobu1, char* jobu2,
                    char* jobv1t, char* jobv2t, char* trans,
                    char* signs, lapack_int* m, lapack_int* p,
                    lapack_int* q, lapack_complex_double* x11,
                    lapack_int* ldx11, lapack_complex_double* x12,
                    lapack_int* ldx12, lapack_complex_double* x21,
                    lapack_int* ldx21, lapack_complex_double* x22,
                    lapack_int* ldx22, double* theta,
                    lapack_complex_double* u1, lapack_int* ldu1,
                    lapack_complex_double* u2, lapack_int* ldu2,
                    lapack_complex_double* v1t, lapack_int* ldv1t,
                    lapack_complex_double* v2t, lapack_int* ldv2t,
                    lapack_complex_double* work, lapack_int* lwork,
                    double* rwork, lapack_int* lrwork,
                    lapack_int* iwork , lapack_int *info );
// LAPACK 3.4.0
void LAPACK_sgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
                     lapack_int* k, lapack_int* nb, const float* v,
                     lapack_int* ldv, const float* t, lapack_int* ldt, float* c,
                     lapack_int* ldc, float* work, lapack_int *info );
void LAPACK_dgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
                     lapack_int* k, lapack_int* nb, const double* v,
                     lapack_int* ldv, const double* t, lapack_int* ldt,
                     double* c, lapack_int* ldc, double* work,
                     lapack_int *info );
void LAPACK_cgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
                     lapack_int* k, lapack_int* nb,
                     const lapack_complex_float* v, lapack_int* ldv,
                     const lapack_complex_float* t, lapack_int* ldt,
                     lapack_complex_float* c, lapack_int* ldc,
                     lapack_complex_float* work, lapack_int *info );
void LAPACK_zgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
                     lapack_int* k, lapack_int* nb,
                     const lapack_complex_double* v, lapack_int* ldv,
                     const lapack_complex_double* t, lapack_int* ldt,
                     lapack_complex_double* c, lapack_int* ldc,
                     lapack_complex_double* work, lapack_int *info );
void LAPACK_sgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, float* a,
                    lapack_int* lda, float* t, lapack_int* ldt, float* work,
                    lapack_int *info );
void LAPACK_dgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, double* a,
                    lapack_int* lda, double* t, lapack_int* ldt, double* work,
                    lapack_int *info );
void LAPACK_cgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* t, lapack_int* ldt,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_zgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* t, lapack_int* ldt,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_sgeqrt2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                     float* t, lapack_int* ldt, lapack_int *info );
void LAPACK_dgeqrt2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                     double* t, lapack_int* ldt, lapack_int *info );
void LAPACK_cgeqrt2( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                     lapack_int* lda, lapack_complex_float* t, lapack_int* ldt,
                     lapack_int *info );
void LAPACK_zgeqrt2( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                     lapack_int* lda, lapack_complex_double* t, lapack_int* ldt,
                     lapack_int *info );
void LAPACK_sgeqrt3( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
                     float* t, lapack_int* ldt, lapack_int *info );
void LAPACK_dgeqrt3( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
                     double* t, lapack_int* ldt, lapack_int *info );
void LAPACK_cgeqrt3( lapack_int* m, lapack_int* n, lapack_complex_float* a,
                     lapack_int* lda, lapack_complex_float* t, lapack_int* ldt,
                     lapack_int *info );
void LAPACK_zgeqrt3( lapack_int* m, lapack_int* n, lapack_complex_double* a,
                     lapack_int* lda, lapack_complex_double* t, lapack_int* ldt,
                     lapack_int *info );
void LAPACK_stpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
                     lapack_int* k, lapack_int* l, lapack_int* nb,
                     const float* v, lapack_int* ldv, const float* t,
                     lapack_int* ldt, float* a, lapack_int* lda, float* b,
                     lapack_int* ldb, float* work, lapack_int *info );
void LAPACK_dtpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
                     lapack_int* k, lapack_int* l, lapack_int* nb,
                     const double* v, lapack_int* ldv, const double* t,
                     lapack_int* ldt, double* a, lapack_int* lda, double* b,
                     lapack_int* ldb, double* work, lapack_int *info );
void LAPACK_ctpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
                     lapack_int* k, lapack_int* l, lapack_int* nb,
                     const lapack_complex_float* v, lapack_int* ldv,
                     const lapack_complex_float* t, lapack_int* ldt,
                     lapack_complex_float* a, lapack_int* lda,
                     lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* work, lapack_int *info );
void LAPACK_ztpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
                     lapack_int* k, lapack_int* l, lapack_int* nb,
                     const lapack_complex_double* v, lapack_int* ldv,
                     const lapack_complex_double* t, lapack_int* ldt,
                     lapack_complex_double* a, lapack_int* lda,
                     lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* work, lapack_int *info );
void LAPACK_dtpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb,
                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
                    double* t, lapack_int* ldt, double* work,
                    lapack_int *info );
void LAPACK_ctpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb,
                    lapack_complex_float* t, lapack_int* ldt,
                    lapack_complex_float* work, lapack_int *info );
void LAPACK_ztpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb,
                    lapack_complex_double* t, lapack_int* ldt,
                    lapack_complex_double* work, lapack_int *info );
void LAPACK_stpqrt2( lapack_int* m, lapack_int* n, lapack_int* l,
                     float* a, lapack_int* lda,
                     float* b, lapack_int* ldb,
                     float* t, lapack_int* ldt,
                     lapack_int *info );
void LAPACK_dtpqrt2( lapack_int* m, lapack_int* n, lapack_int* l,
                     double* a, lapack_int* lda,
                     double* b, lapack_int* ldb,
                     double* t, lapack_int* ldt,
                     lapack_int *info );
void LAPACK_ctpqrt2( lapack_int* m, lapack_int* n, lapack_int* l,
                     lapack_complex_float* a, lapack_int* lda,
                     lapack_complex_float* b, lapack_int* ldb,
                     lapack_complex_float* t, lapack_int* ldt,
                     lapack_int *info );
void LAPACK_ztpqrt2( lapack_int* m, lapack_int* n, lapack_int* l,
                     lapack_complex_double* a, lapack_int* lda,
                     lapack_complex_double* b, lapack_int* ldb,
                     lapack_complex_double* t, lapack_int* ldt,
                     lapack_int *info );
void LAPACK_stprfb( char* side, char* trans, char* direct, char* storev,
                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,
                    const float* v, lapack_int* ldv, const float* t,
                    lapack_int* ldt, float* a, lapack_int* lda, float* b,
                    lapack_int* ldb, const float* work,
                    lapack_int* ldwork );
void LAPACK_dtprfb( char* side, char* trans, char* direct, char* storev,
                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,
                    const double* v, lapack_int* ldv, const double* t,
                    lapack_int* ldt, double* a, lapack_int* lda, double* b,
                    lapack_int* ldb, const double* work,
                    lapack_int* ldwork );
void LAPACK_ctprfb( char* side, char* trans, char* direct, char* storev,
                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,
                    const lapack_complex_float* v, lapack_int* ldv,
                    const lapack_complex_float* t, lapack_int* ldt,
                    lapack_complex_float* a, lapack_int* lda,
                    lapack_complex_float* b, lapack_int* ldb,
                    const float* work, lapack_int* ldwork );
void LAPACK_ztprfb( char* side, char* trans, char* direct, char* storev,
                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,
                    const lapack_complex_double* v, lapack_int* ldv,
                    const lapack_complex_double* t, lapack_int* ldt,
                    lapack_complex_double* a, lapack_int* lda,
                    lapack_complex_double* b, lapack_int* ldb,
                    const double* work, lapack_int* ldwork );
// LAPACK 3.5.0
void LAPACK_ssysv_rook( char* uplo, lapack_int* n, lapack_int* nrhs, float* a,
                        lapack_int* lda, lapack_int* ipiv, float* b,
                        lapack_int* ldb, float* work, lapack_int* lwork,
                        lapack_int *info );
void LAPACK_dsysv_rook( char* uplo, lapack_int* n, lapack_int* nrhs, double* a,
                        lapack_int* lda, lapack_int* ipiv, double* b,
                        lapack_int* ldb, double* work, lapack_int* lwork,
                        lapack_int *info );
void LAPACK_csysv_rook( char* uplo, lapack_int* n, lapack_int* nrhs,
                        lapack_complex_float* a, lapack_int* lda,
                        lapack_int* ipiv, lapack_complex_float* b,
                        lapack_int* ldb, lapack_complex_float* work,
                        lapack_int* lwork, lapack_int *info );
void LAPACK_zsysv_rook( char* uplo, lapack_int* n, lapack_int* nrhs,
                        lapack_complex_double* a, lapack_int* lda,
                        lapack_int* ipiv, lapack_complex_double* b,
                        lapack_int* ldb, lapack_complex_double* work,
                        lapack_int* lwork, lapack_int *info );
void LAPACK_csyr( char* uplo, lapack_int* n, lapack_complex_float* alpha,
                      const lapack_complex_float* x, lapack_int* incx,
                      lapack_complex_float* a, lapack_int* lda );
void LAPACK_zsyr( char* uplo, lapack_int* n, lapack_complex_double* alpha,
                      const lapack_complex_double* x, lapack_int* incx,
                      lapack_complex_double* a, lapack_int* lda );
void LAPACK_ilaver( const lapack_int* vers_major, const lapack_int* vers_minor,
                     const lapack_int* vers_patch );

#ifdef __cplusplus
}
#endif /* __cplusplus */

#endif /* _LAPACKE_H_ */


================================================
FILE: ext/nmatrix_lapacke/lapacke/include/lapacke_config.h
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK
* Author: Intel Corporation
* Generated May, 2011
*****************************************************************************/

#ifndef _LAPACKE_CONFIG_H_
#define _LAPACKE_CONFIG_H_

#ifdef __cplusplus
#if defined(LAPACK_COMPLEX_CPP)
#include <complex>
#endif
extern "C" {
#endif /* __cplusplus */

#include <stdlib.h>

#ifndef lapack_int
#if defined(LAPACK_ILP64)
#define lapack_int              long
#else
#define lapack_int              int
#endif
#endif

#ifndef lapack_logical
#define lapack_logical          lapack_int
#endif

#ifndef LAPACK_COMPLEX_CUSTOM

#if defined(LAPACK_COMPLEX_STRUCTURE)

typedef struct { float real, imag; } _lapack_complex_float;
typedef struct { double real, imag; } _lapack_complex_double;
#define lapack_complex_float  _lapack_complex_float
#define lapack_complex_double _lapack_complex_double
#define lapack_complex_float_real(z)  ((z).real)
#define lapack_complex_float_imag(z)  ((z).imag)
#define lapack_complex_double_real(z)  ((z).real)
#define lapack_complex_double_imag(z)  ((z).imag)

#elif defined(LAPACK_COMPLEX_C99)

#include <complex.h>
#define lapack_complex_float    float _Complex
#define lapack_complex_double   double _Complex
#define lapack_complex_float_real(z)       (creal(z))
#define lapack_complex_float_imag(z)       (cimag(z))
#define lapack_complex_double_real(z)       (creal(z))
#define lapack_complex_double_imag(z)       (cimag(z))

#elif defined(LAPACK_COMPLEX_CPP)

#define lapack_complex_float std::complex<float>
#define lapack_complex_double std::complex<double>
#define lapack_complex_float_real(z)       ((z).real())
#define lapack_complex_float_imag(z)       ((z).imag())
#define lapack_complex_double_real(z)       ((z).real())
#define lapack_complex_double_imag(z)       ((z).imag())

#else

#include <complex.h>
#define lapack_complex_float    float _Complex
#define lapack_complex_double   double _Complex
#define lapack_complex_float_real(z)       (creal(z))
#define lapack_complex_float_imag(z)       (cimag(z))
#define lapack_complex_double_real(z)       (creal(z))
#define lapack_complex_double_imag(z)       (cimag(z))

#endif

lapack_complex_float lapack_make_complex_float( float re, float im );
lapack_complex_double lapack_make_complex_double( double re, double im );

#endif

#ifndef LAPACK_malloc
#define LAPACK_malloc( size )   malloc( size )
#endif

#ifndef LAPACK_free
#define LAPACK_free( p )        free( p )
#endif

#ifdef __cplusplus
}
#endif /* __cplusplus */

#endif /* _LAPACKE_CONFIG_H_ */


================================================
FILE: ext/nmatrix_lapacke/lapacke/include/lapacke_mangling.h
================================================
#ifndef LAPACK_HEADER_INCLUDED
#define LAPACK_HEADER_INCLUDED

#ifndef LAPACK_GLOBAL
#if defined(LAPACK_GLOBAL_PATTERN_LC) || defined(ADD_)
#define LAPACK_GLOBAL(lcname,UCNAME)  lcname##_
#elif defined(LAPACK_GLOBAL_PATTERN_UC) || defined(UPPER)
#define LAPACK_GLOBAL(lcname,UCNAME)  UCNAME
#elif defined(LAPACK_GLOBAL_PATTERN_MC) || defined(NOCHANGE)
#define LAPACK_GLOBAL(lcname,UCNAME)  lcname
#else
#define LAPACK_GLOBAL(lcname,UCNAME)  lcname##_
#endif
#endif

#endif


================================================
FILE: ext/nmatrix_lapacke/lapacke/include/lapacke_mangling_with_flags.h
================================================
#ifndef LAPACK_HEADER_INCLUDED
#define LAPACK_HEADER_INCLUDED

#ifndef LAPACK_GLOBAL
#if defined(LAPACK_GLOBAL_PATTERN_LC) || defined(ADD_)
#define LAPACK_GLOBAL(lcname,UCNAME)  lcname##_
#elif defined(LAPACK_GLOBAL_PATTERN_UC) || defined(UPPER)
#define LAPACK_GLOBAL(lcname,UCNAME)  UCNAME
#elif defined(LAPACK_GLOBAL_PATTERN_MC) || defined(NOCHANGE)
#define LAPACK_GLOBAL(lcname,UCNAME)  lcname
#else
#define LAPACK_GLOBAL(lcname,UCNAME)  lcname##_
#endif
#endif

#endif


================================================
FILE: ext/nmatrix_lapacke/lapacke/include/lapacke_utils.h
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility functions
* Author: Intel Corporation
* Created in January, 2010
*****************************************************************************/

#ifndef _LAPACKE_UTILS_H_
#define _LAPACKE_UTILS_H_

#include "lapacke.h"

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

#ifndef ABS
#define ABS(x) (((x) < 0) ? -(x) : (x))
#endif
#ifndef MAX
#define MAX(x,y) (((x) > (y)) ? (x) : (y))
#endif
#ifndef MIN
#define MIN(x,y) (((x) < (y)) ? (x) : (y))
#endif
#ifndef MAX3
#define MAX3(x,y,z) (((x) > MAX(y,z)) ? (x) : MAX(y,z))
#endif
#ifndef MIN3
#define MIN3(x,y,z) (((x) < MIN(y,z)) ? (x) : MIN(y,z))
#endif

#define IS_S_NONZERO(x) ( (x) < 0 || (x) > 0 )
#define IS_D_NONZERO(x) ( (x) < 0 || (x) > 0 )
#define IS_C_NONZERO(x) ( IS_S_NONZERO(*((float*)&x)) ||  \
                          IS_S_NONZERO(*(((float*)&x)+1)) )
#define IS_Z_NONZERO(x) ( IS_D_NONZERO(*((double*)&x)) || \
                          IS_D_NONZERO(*(((double*)&x)+1)) )

/* Error handler */
void LAPACKE_xerbla( const char *name, lapack_int info );

/* Compare two chars (case-insensitive) */
lapack_logical LAPACKE_lsame( char ca,  char cb );

/* Functions to convert column-major to row-major 2d arrays and vice versa. */
void LAPACKE_cgb_trans( int matrix_order, lapack_int m, lapack_int n,
                        lapack_int kl, lapack_int ku,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout );
void LAPACKE_cge_trans( int matrix_order, lapack_int m, lapack_int n,
                        const lapack_complex_float* in, lapack_int ldin,
                        lapack_complex_float* out, lapack_int ldout );
void LAPACKE_cgg_trans( int matrix_order, lapack_int m, lapack_int n,
                        const lapack_complex_float* in, lapack_int ldin,
                        lapack_complex_float* out, lapack_int ldout );
void LAPACKE_chb_trans( int matrix_order, char uplo, lapack_int n,
                        lapack_int kd,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout );
void LAPACKE_che_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout );
void LAPACKE_chp_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_float *in,
                        lapack_complex_float *out );
void LAPACKE_chs_trans( int matrix_order, lapack_int n,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout );
void LAPACKE_cpb_trans( int matrix_order, char uplo, lapack_int n,
                        lapack_int kd,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout );
void LAPACKE_cpf_trans( int matrix_order, char transr, char uplo,
                        lapack_int n, const lapack_complex_float *in,
                        lapack_complex_float *out );
void LAPACKE_cpo_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout );
void LAPACKE_cpp_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_float *in,
                        lapack_complex_float *out );
void LAPACKE_csp_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_float *in,
                        lapack_complex_float *out );
void LAPACKE_csy_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout );
void LAPACKE_ctb_trans( int matrix_order, char uplo, char diag,
                        lapack_int n, lapack_int kd,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout );
void LAPACKE_ctf_trans( int matrix_order, char transr, char uplo, char diag,
                        lapack_int n, const lapack_complex_float *in,
                        lapack_complex_float *out );
void LAPACKE_ctp_trans( int matrix_order, char uplo, char diag,
                        lapack_int n, const lapack_complex_float *in,
                        lapack_complex_float *out );
void LAPACKE_ctr_trans( int matrix_order, char uplo, char diag, lapack_int n,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout );

void LAPACKE_dgb_trans( int matrix_order, lapack_int m, lapack_int n,
                        lapack_int kl, lapack_int ku,
                        const double *in, lapack_int ldin,
                        double *out, lapack_int ldout );
void LAPACKE_dge_trans( int matrix_order, lapack_int m, lapack_int n,
                        const double* in, lapack_int ldin,
                        double* out, lapack_int ldout );
void LAPACKE_dgg_trans( int matrix_order, lapack_int m, lapack_int n,
                        const double* in, lapack_int ldin,
                        double* out, lapack_int ldout );
void LAPACKE_dhs_trans( int matrix_order, lapack_int n,
                        const double *in, lapack_int ldin,
                        double *out, lapack_int ldout );
void LAPACKE_dpb_trans( int matrix_order, char uplo, lapack_int n,
                        lapack_int kd,
                        const double *in, lapack_int ldin,
                        double *out, lapack_int ldout );
void LAPACKE_dpf_trans( int matrix_order, char transr, char uplo,
                        lapack_int n, const double *in,
                        double *out );
void LAPACKE_dpo_trans( int matrix_order, char uplo, lapack_int n,
                        const double *in, lapack_int ldin,
                        double *out, lapack_int ldout );
void LAPACKE_dpp_trans( int matrix_order, char uplo, lapack_int n,
                        const double *in,
                        double *out );
void LAPACKE_dsb_trans( int matrix_order, char uplo, lapack_int n,
                        lapack_int kd,
                        const double *in, lapack_int ldin,
                        double *out, lapack_int ldout );
void LAPACKE_dsp_trans( int matrix_order, char uplo, lapack_int n,
                        const double *in,
                        double *out );
void LAPACKE_dsy_trans( int matrix_order, char uplo, lapack_int n,
                        const double *in, lapack_int ldin,
                        double *out, lapack_int ldout );
void LAPACKE_dtb_trans( int matrix_order, char uplo, char diag,
                        lapack_int n, lapack_int kd,
                        const double *in, lapack_int ldin,
                        double *out, lapack_int ldout );
void LAPACKE_dtf_trans( int matrix_order, char transr, char uplo, char diag,
                        lapack_int n, const double *in,
                        double *out );
void LAPACKE_dtp_trans( int matrix_order, char uplo, char diag,
                        lapack_int n, const double *in,
                        double *out );
void LAPACKE_dtr_trans( int matrix_order, char uplo, char diag, lapack_int n,
                        const double *in, lapack_int ldin,
                        double *out, lapack_int ldout );

void LAPACKE_sgb_trans( int matrix_order, lapack_int m, lapack_int n,
                        lapack_int kl, lapack_int ku,
                        const float *in, lapack_int ldin,
                        float *out, lapack_int ldout );
void LAPACKE_sge_trans( int matrix_order, lapack_int m, lapack_int n,
                        const float* in, lapack_int ldin,
                        float* out, lapack_int ldout );
void LAPACKE_sgg_trans( int matrix_order, lapack_int m, lapack_int n,
                        const float* in, lapack_int ldin,
                        float* out, lapack_int ldout );
void LAPACKE_shs_trans( int matrix_order, lapack_int n,
                        const float *in, lapack_int ldin,
                        float *out, lapack_int ldout );
void LAPACKE_spb_trans( int matrix_order, char uplo, lapack_int n,
                        lapack_int kd,
                        const float *in, lapack_int ldin,
                        float *out, lapack_int ldout );
void LAPACKE_spf_trans( int matrix_order, char transr, char uplo,
                        lapack_int n, const float *in,
                        float *out );
void LAPACKE_spo_trans( int matrix_order, char uplo, lapack_int n,
                        const float *in, lapack_int ldin,
                        float *out, lapack_int ldout );
void LAPACKE_spp_trans( int matrix_order, char uplo, lapack_int n,
                        const float *in,
                        float *out );
void LAPACKE_ssb_trans( int matrix_order, char uplo, lapack_int n,
                        lapack_int kd,
                        const float *in, lapack_int ldin,
                        float *out, lapack_int ldout );
void LAPACKE_ssp_trans( int matrix_order, char uplo, lapack_int n,
                        const float *in,
                        float *out );
void LAPACKE_ssy_trans( int matrix_order, char uplo, lapack_int n,
                        const float *in, lapack_int ldin,
                        float *out, lapack_int ldout );
void LAPACKE_stb_trans( int matrix_order, char uplo, char diag,
                        lapack_int n, lapack_int kd,
                        const float *in, lapack_int ldin,
                        float *out, lapack_int ldout );
void LAPACKE_stf_trans( int matrix_order, char transr, char uplo, char diag,
                        lapack_int n, const float *in,
                        float *out );
void LAPACKE_stp_trans( int matrix_order, char uplo, char diag,
                        lapack_int n, const float *in,
                        float *out );
void LAPACKE_str_trans( int matrix_order, char uplo, char diag, lapack_int n,
                        const float *in, lapack_int ldin,
                        float *out, lapack_int ldout );

void LAPACKE_zgb_trans( int matrix_order, lapack_int m, lapack_int n,
                        lapack_int kl, lapack_int ku,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout );
void LAPACKE_zge_trans( int matrix_order, lapack_int m, lapack_int n,
                        const lapack_complex_double* in, lapack_int ldin,
                        lapack_complex_double* out, lapack_int ldout );
void LAPACKE_zgg_trans( int matrix_order, lapack_int m, lapack_int n,
                        const lapack_complex_double* in, lapack_int ldin,
                        lapack_complex_double* out, lapack_int ldout );
void LAPACKE_zhb_trans( int matrix_order, char uplo, lapack_int n,
                        lapack_int kd,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout );
void LAPACKE_zhe_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout );
void LAPACKE_zhp_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_double *in,
                        lapack_complex_double *out );
void LAPACKE_zhs_trans( int matrix_order, lapack_int n,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout );
void LAPACKE_zpb_trans( int matrix_order, char uplo, lapack_int n,
                        lapack_int kd,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout );
void LAPACKE_zpf_trans( int matrix_order, char transr, char uplo,
                        lapack_int n, const lapack_complex_double *in,
                        lapack_complex_double *out );
void LAPACKE_zpo_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout );
void LAPACKE_zpp_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_double *in,
                        lapack_complex_double *out );
void LAPACKE_zsp_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_double *in,
                        lapack_complex_double *out );
void LAPACKE_zsy_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout );
void LAPACKE_ztb_trans( int matrix_order, char uplo, char diag,
                        lapack_int n, lapack_int kd,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout );
void LAPACKE_ztf_trans( int matrix_order, char transr, char uplo, char diag,
                        lapack_int n, const lapack_complex_double *in,
                        lapack_complex_double *out );
void LAPACKE_ztp_trans( int matrix_order, char uplo, char diag,
                        lapack_int n, const lapack_complex_double *in,
                        lapack_complex_double *out );
void LAPACKE_ztr_trans( int matrix_order, char uplo, char diag, lapack_int n,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout );

/* NaN checkers */
#define LAPACK_SISNAN( x ) ( x != x )
#define LAPACK_DISNAN( x ) ( x != x )
#define LAPACK_CISNAN( x ) ( LAPACK_SISNAN(*((float*) &x)) || \
                              LAPACK_SISNAN(*(((float*) &x)+1)) )
#define LAPACK_ZISNAN( x ) ( LAPACK_DISNAN(*((double*)&x)) || \
                              LAPACK_DISNAN(*(((double*)&x)+1)) )

/* NaN checkers for vectors */
lapack_logical LAPACKE_c_nancheck( lapack_int n,
                                    const lapack_complex_float *x,
                                    lapack_int incx );
lapack_logical LAPACKE_d_nancheck( lapack_int n,
                                    const double *x,
                                    lapack_int incx );
lapack_logical LAPACKE_s_nancheck( lapack_int n,
                                    const float *x,
                                    lapack_int incx );
lapack_logical LAPACKE_z_nancheck( lapack_int n,
                                    const lapack_complex_double *x,
                                    lapack_int incx );
/* NaN checkers for matrices */
lapack_logical LAPACKE_cgb_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n, lapack_int kl,
                                      lapack_int ku,
                                      const lapack_complex_float *ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_cge_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const lapack_complex_float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_cgg_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const lapack_complex_float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_cgt_nancheck( lapack_int n,
                                      const lapack_complex_float *dl,
                                      const lapack_complex_float *d,
                                      const lapack_complex_float *du );
lapack_logical LAPACKE_chb_nancheck( int matrix_order, char uplo,
                                      lapack_int n, lapack_int kd,
                                      const lapack_complex_float* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_che_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const lapack_complex_float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_chp_nancheck( lapack_int n,
                                      const lapack_complex_float *ap );
lapack_logical LAPACKE_chs_nancheck( int matrix_order, lapack_int n,
                                      const lapack_complex_float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_cpb_nancheck( int matrix_order, char uplo,
                                      lapack_int n, lapack_int kd,
                                      const lapack_complex_float* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_cpf_nancheck( lapack_int n,
                                      const lapack_complex_float *a );
lapack_logical LAPACKE_cpo_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const lapack_complex_float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_cpp_nancheck( lapack_int n,
                                      const lapack_complex_float *ap );
lapack_logical LAPACKE_cpt_nancheck( lapack_int n,
                                      const float *d,
                                      const lapack_complex_float *e );
lapack_logical LAPACKE_csp_nancheck( lapack_int n,
                                      const lapack_complex_float *ap );
lapack_logical LAPACKE_cst_nancheck( lapack_int n,
                                      const lapack_complex_float *d,
                                      const lapack_complex_float *e );
lapack_logical LAPACKE_csy_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const lapack_complex_float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_ctb_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n, lapack_int kd,
                                      const lapack_complex_float* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_ctf_nancheck( int matrix_order, char transr,
                                      char uplo, char diag,
                                      lapack_int n,
                                      const lapack_complex_float *a );
lapack_logical LAPACKE_ctp_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const lapack_complex_float *ap );
lapack_logical LAPACKE_ctr_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const lapack_complex_float *a,
                                      lapack_int lda );

lapack_logical LAPACKE_dgb_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n, lapack_int kl,
                                      lapack_int ku,
                                      const double *ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_dge_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_dgg_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_dgt_nancheck( lapack_int n,
                                      const double *dl,
                                      const double *d,
                                      const double *du );
lapack_logical LAPACKE_dhs_nancheck( int matrix_order, lapack_int n,
                                      const double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_dpb_nancheck( int matrix_order, char uplo,
                                      lapack_int n, lapack_int kd,
                                      const double* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_dpf_nancheck( lapack_int n,
                                      const double *a );
lapack_logical LAPACKE_dpo_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_dpp_nancheck( lapack_int n,
                                      const double *ap );
lapack_logical LAPACKE_dpt_nancheck( lapack_int n,
                                      const double *d,
                                      const double *e );
lapack_logical LAPACKE_dsb_nancheck( int matrix_order, char uplo,
                                      lapack_int n, lapack_int kd,
                                      const double* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_dsp_nancheck( lapack_int n,
                                      const double *ap );
lapack_logical LAPACKE_dst_nancheck( lapack_int n,
                                      const double *d,
                                      const double *e );
lapack_logical LAPACKE_dsy_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_dtb_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n, lapack_int kd,
                                      const double* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_dtf_nancheck( int matrix_order, char transr,
                                      char uplo, char diag,
                                      lapack_int n,
                                      const double *a );
lapack_logical LAPACKE_dtp_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const double *ap );
lapack_logical LAPACKE_dtr_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const double *a,
                                      lapack_int lda );

lapack_logical LAPACKE_sgb_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n, lapack_int kl,
                                      lapack_int ku,
                                      const float *ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_sge_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_sgg_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_sgt_nancheck( lapack_int n,
                                      const float *dl,
                                      const float *d,
                                      const float *du );
lapack_logical LAPACKE_shs_nancheck( int matrix_order, lapack_int n,
                                      const float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_spb_nancheck( int matrix_order, char uplo,
                                      lapack_int n, lapack_int kd,
                                      const float* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_spf_nancheck( lapack_int n,
                                      const float *a );
lapack_logical LAPACKE_spo_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_spp_nancheck( lapack_int n,
                                      const float *ap );
lapack_logical LAPACKE_spt_nancheck( lapack_int n,
                                      const float *d,
                                      const float *e );
lapack_logical LAPACKE_ssb_nancheck( int matrix_order, char uplo,
                                      lapack_int n, lapack_int kd,
                                      const float* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_ssp_nancheck( lapack_int n,
                                      const float *ap );
lapack_logical LAPACKE_sst_nancheck( lapack_int n,
                                      const float *d,
                                      const float *e );
lapack_logical LAPACKE_ssy_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const float *a,
                                      lapack_int lda );
lapack_logical LAPACKE_stb_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n, lapack_int kd,
                                      const float* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_stf_nancheck( int matrix_order, char transr,
                                      char uplo, char diag,
                                      lapack_int n,
                                      const float *a );
lapack_logical LAPACKE_stp_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const float *ap );
lapack_logical LAPACKE_str_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const float *a,
                                      lapack_int lda );

lapack_logical LAPACKE_zgb_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n, lapack_int kl,
                                      lapack_int ku,
                                      const lapack_complex_double *ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_zge_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const lapack_complex_double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_zgg_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const lapack_complex_double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_zgt_nancheck( lapack_int n,
                                      const lapack_complex_double *dl,
                                      const lapack_complex_double *d,
                                      const lapack_complex_double *du );
lapack_logical LAPACKE_zhb_nancheck( int matrix_order, char uplo,
                                      lapack_int n, lapack_int kd,
                                      const lapack_complex_double* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_zhe_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const lapack_complex_double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_zhp_nancheck( lapack_int n,
                                      const lapack_complex_double *ap );
lapack_logical LAPACKE_zhs_nancheck( int matrix_order, lapack_int n,
                                      const lapack_complex_double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_zpb_nancheck( int matrix_order, char uplo,
                                      lapack_int n, lapack_int kd,
                                      const lapack_complex_double* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_zpf_nancheck( lapack_int n,
                                      const lapack_complex_double *a );
lapack_logical LAPACKE_zpo_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const lapack_complex_double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_zpp_nancheck( lapack_int n,
                                      const lapack_complex_double *ap );
lapack_logical LAPACKE_zpt_nancheck( lapack_int n,
                                      const double *d,
                                      const lapack_complex_double *e );
lapack_logical LAPACKE_zsp_nancheck( lapack_int n,
                                      const lapack_complex_double *ap );
lapack_logical LAPACKE_zst_nancheck( lapack_int n,
                                      const lapack_complex_double *d,
                                      const lapack_complex_double *e );
lapack_logical LAPACKE_zsy_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const lapack_complex_double *a,
                                      lapack_int lda );
lapack_logical LAPACKE_ztb_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n, lapack_int kd,
                                      const lapack_complex_double* ab,
                                      lapack_int ldab );
lapack_logical LAPACKE_ztf_nancheck( int matrix_order, char transr,
                                      char uplo, char diag,
                                      lapack_int n,
                                      const lapack_complex_double *a );
lapack_logical LAPACKE_ztp_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const lapack_complex_double *ap );
lapack_logical LAPACKE_ztr_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const lapack_complex_double *a,
                                      lapack_int lda );

#ifdef __cplusplus
}
#endif /* __cplusplus */

#endif  /* _LAPACKE_UTILS_H_ */


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgeev.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cgeev
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgeev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, lapack_complex_float* a, lapack_int lda,
                          lapack_complex_float* w, lapack_complex_float* vl,
                          lapack_int ldvl, lapack_complex_float* vr,
                          lapack_int ldvr )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    float* rwork = NULL;
    lapack_complex_float* work = NULL;
    lapack_complex_float work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cgeev", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_cge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -5;
    }
#endif
    /* Allocate memory for working array(s) */
    rwork = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,2*n) );
    if( rwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Query optimal working array(s) size */
    info = LAPACKE_cgeev_work( matrix_order, jobvl, jobvr, n, a, lda, w, vl,
                               ldvl, vr, ldvr, &work_query, lwork, rwork );
    if( info != 0 ) {
        goto exit_level_1;
    }
    lwork = LAPACK_C2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_float*)
        LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_1;
    }
    /* Call middle-level interface */
    info = LAPACKE_cgeev_work( matrix_order, jobvl, jobvr, n, a, lda, w, vl,
                               ldvl, vr, ldvr, work, lwork, rwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_1:
    LAPACKE_free( rwork );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_cgeev", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgeev_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cgeev
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgeev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, lapack_complex_float* a,
                               lapack_int lda, lapack_complex_float* w,
                               lapack_complex_float* vl, lapack_int ldvl,
                               lapack_complex_float* vr, lapack_int ldvr,
                               lapack_complex_float* work, lapack_int lwork,
                               float* rwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cgeev( &jobvl, &jobvr, &n, a, &lda, w, vl, &ldvl, vr, &ldvr,
                      work, &lwork, rwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldvl_t = MAX(1,n);
        lapack_int ldvr_t = MAX(1,n);
        lapack_complex_float* a_t = NULL;
        lapack_complex_float* vl_t = NULL;
        lapack_complex_float* vr_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_cgeev_work", info );
            return info;
        }
        if( ldvl < n ) {
            info = -9;
            LAPACKE_xerbla( "LAPACKE_cgeev_work", info );
            return info;
        }
        if( ldvr < n ) {
            info = -11;
            LAPACKE_xerbla( "LAPACKE_cgeev_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_cgeev( &jobvl, &jobvr, &n, a, &lda_t, w, vl, &ldvl_t, vr,
                          &ldvr_t, work, &lwork, rwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            vl_t = (lapack_complex_float*)
                LAPACKE_malloc( sizeof(lapack_complex_float) *
                                ldvl_t * MAX(1,n) );
            if( vl_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            vr_t = (lapack_complex_float*)
                LAPACKE_malloc( sizeof(lapack_complex_float) *
                                ldvr_t * MAX(1,n) );
            if( vr_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_cge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cgeev( &jobvl, &jobvr, &n, a_t, &lda_t, w, vl_t, &ldvl_t, vr_t,
                      &ldvr_t, work, &lwork, rwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, vl_t, ldvl_t, vl, ldvl );
        }
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, vr_t, ldvr_t, vr, ldvr );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            LAPACKE_free( vr_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            LAPACKE_free( vl_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cgeev_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cgeev_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgeqrf.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cgeqrf
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgeqrf( int matrix_layout, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_complex_float* tau )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    lapack_complex_float* work = NULL;
    lapack_complex_float work_query;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cgeqrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
        return -4;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_cgeqrf_work( matrix_layout, m, n, a, lda, tau, &work_query,
                                lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = LAPACK_C2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_float*)
        LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_cgeqrf_work( matrix_layout, m, n, a, lda, tau, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_cgeqrf", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgeqrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cgeqrf
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgeqrf_work( int matrix_layout, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_complex_float* tau,
                                lapack_complex_float* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cgeqrf( &m, &n, a, &lda, tau, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        lapack_complex_float* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_cgeqrf_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_cgeqrf( &m, &n, a, &lda_t, tau, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cgeqrf( &m, &n, a_t, &lda_t, tau, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cgeqrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cgeqrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgesdd.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cgesdd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgesdd( int matrix_order, char jobz, lapack_int m,
                           lapack_int n, lapack_complex_float* a,
                           lapack_int lda, float* s, lapack_complex_float* u,
                           lapack_int ldu, lapack_complex_float* vt,
                           lapack_int ldvt )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    /* Additional scalars declarations for work arrays */
    size_t lrwork;
    lapack_int* iwork = NULL;
    float* rwork = NULL;
    lapack_complex_float* work = NULL;
    lapack_complex_float work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cgesdd", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_cge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -5;
    }
#endif
    /* Additional scalars initializations for work arrays */
    if( LAPACKE_lsame( jobz, 'n' ) ) {
        lrwork = MAX(1,5*MIN(m,n));
    } else {
        lrwork = (size_t)5*MAX(1,MIN(m,n))*MAX(1,MIN(m,n))+7*MIN(m,n);
    }
    /* Allocate memory for working array(s) */
    iwork = (lapack_int*)
        LAPACKE_malloc( sizeof(lapack_int) * MAX(1,8*MIN(m,n)) );
    if( iwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    rwork = (float*)LAPACKE_malloc( sizeof(float) * lrwork );
    if( rwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_1;
    }
    /* Query optimal working array(s) size */
    info = LAPACKE_cgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt,
                                ldvt, &work_query, lwork, rwork, iwork );
    if( info != 0 ) {
        goto exit_level_2;
    }
    lwork = LAPACK_C2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_float*)
        LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_2;
    }
    /* Call middle-level interface */
    info = LAPACKE_cgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt,
                                ldvt, work, lwork, rwork, iwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_2:
    LAPACKE_free( rwork );
exit_level_1:
    LAPACKE_free( iwork );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_cgesdd", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgesdd_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cgesdd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgesdd_work( int matrix_order, char jobz, lapack_int m,
                                lapack_int n, lapack_complex_float* a,
                                lapack_int lda, float* s,
                                lapack_complex_float* u, lapack_int ldu,
                                lapack_complex_float* vt, lapack_int ldvt,
                                lapack_complex_float* work, lapack_int lwork,
                                float* rwork, lapack_int* iwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cgesdd( &jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work,
                       &lwork, rwork, iwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int nrows_u = ( LAPACKE_lsame( jobz, 'a' ) ||
                             LAPACKE_lsame( jobz, 's' ) ||
                             ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? m : 1;
        lapack_int ncols_u = ( LAPACKE_lsame( jobz, 'a' ) ||
                             ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? m :
                             ( LAPACKE_lsame( jobz, 's' ) ? MIN(m,n) : 1);
        lapack_int nrows_vt = ( LAPACKE_lsame( jobz, 'a' ) ||
                              ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? n :
                              ( LAPACKE_lsame( jobz, 's' ) ? MIN(m,n) : 1);
        lapack_int lda_t = MAX(1,m);
        lapack_int ldu_t = MAX(1,nrows_u);
        lapack_int ldvt_t = MAX(1,nrows_vt);
        lapack_complex_float* a_t = NULL;
        lapack_complex_float* u_t = NULL;
        lapack_complex_float* vt_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_cgesdd_work", info );
            return info;
        }
        if( ldu < ncols_u ) {
            info = -9;
            LAPACKE_xerbla( "LAPACKE_cgesdd_work", info );
            return info;
        }
        if( ldvt < n ) {
            info = -11;
            LAPACKE_xerbla( "LAPACKE_cgesdd_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_cgesdd( &jobz, &m, &n, a, &lda_t, s, u, &ldu_t, vt, &ldvt_t,
                           work, &lwork, rwork, iwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            u_t = (lapack_complex_float*)
                LAPACKE_malloc( sizeof(lapack_complex_float) *
                                ldu_t * MAX(1,ncols_u) );
            if( u_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            vt_t = (lapack_complex_float*)
                LAPACKE_malloc( sizeof(lapack_complex_float) *
                                ldvt_t * MAX(1,n) );
            if( vt_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_cge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cgesdd( &jobz, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t,
                       &ldvt_t, work, &lwork, rwork, iwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t,
                               u, ldu );
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt,
                               ldvt );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            LAPACKE_free( vt_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            LAPACKE_free( u_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cgesdd_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cgesdd_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgesvd.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cgesvd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgesvd( int matrix_order, char jobu, char jobvt,
                           lapack_int m, lapack_int n, lapack_complex_float* a,
                           lapack_int lda, float* s, lapack_complex_float* u,
                           lapack_int ldu, lapack_complex_float* vt,
                           lapack_int ldvt, float* superb )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    float* rwork = NULL;
    lapack_complex_float* work = NULL;
    lapack_complex_float work_query;
    lapack_int i;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cgesvd", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_cge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -6;
    }
#endif
    /* Allocate memory for working array(s) */
    rwork = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,5*MIN(m,n)) );
    if( rwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Query optimal working array(s) size */
    info = LAPACKE_cgesvd_work( matrix_order, jobu, jobvt, m, n, a, lda, s, u,
                                ldu, vt, ldvt, &work_query, lwork, rwork );
    if( info != 0 ) {
        goto exit_level_1;
    }
    lwork = LAPACK_C2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_float*)
        LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_1;
    }
    /* Call middle-level interface */
    info = LAPACKE_cgesvd_work( matrix_order, jobu, jobvt, m, n, a, lda, s, u,
                                ldu, vt, ldvt, work, lwork, rwork );
    /* Backup significant data from working array(s) */
    for( i=0; i<MIN(m,n)-1; i++ ) {
        superb[i] = rwork[i];
    }
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_1:
    LAPACKE_free( rwork );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_cgesvd", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgesvd_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cgesvd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgesvd_work( int matrix_order, char jobu, char jobvt,
                                lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                float* s, lapack_complex_float* u,
                                lapack_int ldu, lapack_complex_float* vt,
                                lapack_int ldvt, lapack_complex_float* work,
                                lapack_int lwork, float* rwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cgesvd( &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt,
                       work, &lwork, rwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) ||
                             LAPACKE_lsame( jobu, 's' ) ) ? m : 1;
        lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m :
                             ( LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1);
        lapack_int nrows_vt = LAPACKE_lsame( jobvt, 'a' ) ? n :
                              ( LAPACKE_lsame( jobvt, 's' ) ? MIN(m,n) : 1);
        lapack_int lda_t = MAX(1,m);
        lapack_int ldu_t = MAX(1,nrows_u);
        lapack_int ldvt_t = MAX(1,nrows_vt);
        lapack_complex_float* a_t = NULL;
        lapack_complex_float* u_t = NULL;
        lapack_complex_float* vt_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -7;
            LAPACKE_xerbla( "LAPACKE_cgesvd_work", info );
            return info;
        }
        if( ldu < ncols_u ) {
            info = -10;
            LAPACKE_xerbla( "LAPACKE_cgesvd_work", info );
            return info;
        }
        if( ldvt < n ) {
            info = -12;
            LAPACKE_xerbla( "LAPACKE_cgesvd_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_cgesvd( &jobu, &jobvt, &m, &n, a, &lda_t, s, u, &ldu_t, vt,
                           &ldvt_t, work, &lwork, rwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            u_t = (lapack_complex_float*)
                LAPACKE_malloc( sizeof(lapack_complex_float) *
                                ldu_t * MAX(1,ncols_u) );
            if( u_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            vt_t = (lapack_complex_float*)
                LAPACKE_malloc( sizeof(lapack_complex_float) *
                                ldvt_t * MAX(1,n) );
            if( vt_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_cge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cgesvd( &jobu, &jobvt, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t,
                       &ldvt_t, work, &lwork, rwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t,
                               u, ldu );
        }
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt,
                               ldvt );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            LAPACKE_free( vt_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            LAPACKE_free( u_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cgesvd_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cgesvd_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrf.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cgetrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgetrf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           lapack_int* ipiv )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cgetrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_cge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_cgetrf_work( matrix_order, m, n, a, lda, ipiv );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cgetrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgetrf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                lapack_int* ipiv )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cgetrf( &m, &n, a, &lda, ipiv, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        lapack_complex_float* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_cgetrf_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_cge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cgetrf( &m, &n, a_t, &lda_t, ipiv, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cgetrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cgetrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgetri.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cgetri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgetri( int matrix_order, lapack_int n,
                           lapack_complex_float* a, lapack_int lda,
                           const lapack_int* ipiv )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    lapack_complex_float* work = NULL;
    lapack_complex_float work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cgetri", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_cge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -3;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_cgetri_work( matrix_order, n, a, lda, ipiv, &work_query,
                                lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = LAPACK_C2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_float*)
        LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_cgetri_work( matrix_order, n, a, lda, ipiv, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_cgetri", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgetri_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cgetri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgetri_work( int matrix_order, lapack_int n,
                                lapack_complex_float* a, lapack_int lda,
                                const lapack_int* ipiv,
                                lapack_complex_float* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cgetri( &n, a, &lda, ipiv, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_complex_float* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -4;
            LAPACKE_xerbla( "LAPACKE_cgetri_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_cgetri( &n, a, &lda_t, ipiv, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_cge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cgetri( &n, a_t, &lda_t, ipiv, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cgetri_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cgetri_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrs.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cgetrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgetrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, const lapack_int* ipiv,
                           lapack_complex_float* b, lapack_int ldb )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cgetrs", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_cge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -5;
    }
    if( LAPACKE_cge_nancheck( matrix_order, n, nrhs, b, ldb ) ) {
        return -8;
    }
#endif
    return LAPACKE_cgetrs_work( matrix_order, trans, n, nrhs, a, lda, ipiv, b,
                                ldb );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cgetrs_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cgetrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cgetrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* a,
                                lapack_int lda, const lapack_int* ipiv,
                                lapack_complex_float* b, lapack_int ldb )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cgetrs( &trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldb_t = MAX(1,n);
        lapack_complex_float* a_t = NULL;
        lapack_complex_float* b_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_cgetrs_work", info );
            return info;
        }
        if( ldb < nrhs ) {
            info = -9;
            LAPACKE_xerbla( "LAPACKE_cgetrs_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) *
                            ldb_t * MAX(1,nrhs) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_cge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        LAPACKE_cge_trans( matrix_order, n, nrhs, b, ldb, b_t, ldb_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cgetrs( &trans, &n, &nrhs, a_t, &lda_t, ipiv, b_t, &ldb_t,
                       &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb );
        /* Release memory and exit */
        LAPACKE_free( b_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cgetrs_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cgetrs_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrf.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cpotrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cpotrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cpotrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_cpo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_cpotrf_work( matrix_order, uplo, n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cpotrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cpotrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cpotrf( &uplo, &n, a, &lda, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_complex_float* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_cpotrf_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_cpo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cpotrf( &uplo, &n, a_t, &lda_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cpo_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cpotrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cpotrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cpotri.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cpotri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cpotri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_float* a, lapack_int lda )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cpotri", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_cpo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_cpotri_work( matrix_order, uplo, n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cpotri_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cpotri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cpotri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_float* a, lapack_int lda )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cpotri( &uplo, &n, a, &lda, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_complex_float* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_cpotri_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_cpo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cpotri( &uplo, &n, a_t, &lda_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cpo_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cpotri_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cpotri_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrs.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cpotrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cpotrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_float* a,
                           lapack_int lda, lapack_complex_float* b,
                           lapack_int ldb )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cpotrs", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_cpo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -5;
    }
    if( LAPACKE_cge_nancheck( matrix_order, n, nrhs, b, ldb ) ) {
        return -7;
    }
#endif
    return LAPACKE_cpotrs_work( matrix_order, uplo, n, nrhs, a, lda, b, ldb );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cpotrs_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cpotrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cpotrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_float* a,
                                lapack_int lda, lapack_complex_float* b,
                                lapack_int ldb )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cpotrs( &uplo, &n, &nrhs, a, &lda, b, &ldb, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldb_t = MAX(1,n);
        lapack_complex_float* a_t = NULL;
        lapack_complex_float* b_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_cpotrs_work", info );
            return info;
        }
        if( ldb < nrhs ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_cpotrs_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) *
                            ldb_t * MAX(1,nrhs) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_cpo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        LAPACKE_cge_trans( matrix_order, n, nrhs, b, ldb, b_t, ldb_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cpotrs( &uplo, &n, &nrhs, a_t, &lda_t, b_t, &ldb_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb );
        /* Release memory and exit */
        LAPACKE_free( b_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cpotrs_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cpotrs_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cunmqr.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cunmqr
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cunmqr( int matrix_layout, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_float* a, lapack_int lda,
                           const lapack_complex_float* tau,
                           lapack_complex_float* c, lapack_int ldc )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    lapack_complex_float* work = NULL;
    lapack_complex_float work_query;
    lapack_int r;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cunmqr", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    r = LAPACKE_lsame( side, 'l' ) ? m : n;
    if( LAPACKE_cge_nancheck( matrix_layout, r, k, a, lda ) ) {
        return -7;
    }
    if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) {
        return -10;
    }
    if( LAPACKE_c_nancheck( k, tau, 1 ) ) {
        return -9;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_cunmqr_work( matrix_layout, side, trans, m, n, k, a, lda, tau,
                                c, ldc, &work_query, lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = LAPACK_C2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_float*)
        LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_cunmqr_work( matrix_layout, side, trans, m, n, k, a, lda, tau,
                                c, ldc, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_cunmqr", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_cunmqr_work.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cunmqr
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_cunmqr_work( int matrix_layout, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const lapack_complex_float* a, lapack_int lda,
                                const lapack_complex_float* tau,
                                lapack_complex_float* c, lapack_int ldc,
                                lapack_complex_float* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_cunmqr( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work,
                       &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        lapack_int r = LAPACKE_lsame( side, 'l' ) ? m : n;
        lapack_int lda_t = MAX(1,r);
        lapack_int ldc_t = MAX(1,m);
        lapack_complex_float* a_t = NULL;
        lapack_complex_float* c_t = NULL;
        /* Check leading dimension(s) */
        if( lda < k ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_cunmqr_work", info );
            return info;
        }
        if( ldc < n ) {
            info = -11;
            LAPACKE_xerbla( "LAPACKE_cunmqr_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_cunmqr( &side, &trans, &m, &n, &k, a, &lda_t, tau, c, &ldc_t,
                           work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,k) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        c_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * ldc_t * MAX(1,n) );
        if( c_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_cge_trans( matrix_layout, r, k, a, lda, a_t, lda_t );
        LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
        /* Call LAPACK function and adjust info */
        LAPACK_cunmqr( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t,
                       work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
        /* Release memory and exit */
        LAPACKE_free( c_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_cunmqr_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_cunmqr_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgeev.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dgeev
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgeev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, double* a, lapack_int lda, double* wr,
                          double* wi, double* vl, lapack_int ldvl, double* vr,
                          lapack_int ldvr )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    double* work = NULL;
    double work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dgeev", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_dge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -5;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_dgeev_work( matrix_order, jobvl, jobvr, n, a, lda, wr, wi,
                               vl, ldvl, vr, ldvr, &work_query, lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_dgeev_work( matrix_order, jobvl, jobvr, n, a, lda, wr, wi,
                               vl, ldvl, vr, ldvr, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_dgeev", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgeev_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dgeev
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgeev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, double* a, lapack_int lda,
                               double* wr, double* wi, double* vl,
                               lapack_int ldvl, double* vr, lapack_int ldvr,
                               double* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dgeev( &jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr,
                      work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldvl_t = MAX(1,n);
        lapack_int ldvr_t = MAX(1,n);
        double* a_t = NULL;
        double* vl_t = NULL;
        double* vr_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_dgeev_work", info );
            return info;
        }
        if( ldvl < n ) {
            info = -10;
            LAPACKE_xerbla( "LAPACKE_dgeev_work", info );
            return info;
        }
        if( ldvr < n ) {
            info = -12;
            LAPACKE_xerbla( "LAPACKE_dgeev_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_dgeev( &jobvl, &jobvr, &n, a, &lda_t, wr, wi, vl, &ldvl_t,
                          vr, &ldvr_t, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            vl_t = (double*)
                LAPACKE_malloc( sizeof(double) * ldvl_t * MAX(1,n) );
            if( vl_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            vr_t = (double*)
                LAPACKE_malloc( sizeof(double) * ldvr_t * MAX(1,n) );
            if( vr_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_dge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dgeev( &jobvl, &jobvr, &n, a_t, &lda_t, wr, wi, vl_t, &ldvl_t,
                      vr_t, &ldvr_t, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, vl_t, ldvl_t, vl, ldvl );
        }
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, vr_t, ldvr_t, vr, ldvr );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            LAPACKE_free( vr_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            LAPACKE_free( vl_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dgeev_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dgeev_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgeqrf.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dgeqrf
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgeqrf( int matrix_layout, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, double* tau )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    double* work = NULL;
    double work_query;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dgeqrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) {
        return -4;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_dgeqrf_work( matrix_layout, m, n, a, lda, tau, &work_query,
                                lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_dgeqrf_work( matrix_layout, m, n, a, lda, tau, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_dgeqrf", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgeqrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dgeqrf
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgeqrf_work( int matrix_layout, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, double* tau,
                                double* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dgeqrf( &m, &n, a, &lda, tau, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        double* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_dgeqrf_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_dgeqrf( &m, &n, a, &lda_t, tau, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dgeqrf( &m, &n, a_t, &lda_t, tau, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dgeqrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dgeqrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgesdd.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dgesdd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgesdd( int matrix_order, char jobz, lapack_int m,
                           lapack_int n, double* a, lapack_int lda, double* s,
                           double* u, lapack_int ldu, double* vt,
                           lapack_int ldvt )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    lapack_int* iwork = NULL;
    double* work = NULL;
    double work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dgesdd", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_dge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -5;
    }
#endif
    /* Allocate memory for working array(s) */
    iwork = (lapack_int*)
        LAPACKE_malloc( sizeof(lapack_int) * MAX(1,8*MIN(m,n)) );
    if( iwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Query optimal working array(s) size */
    info = LAPACKE_dgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt,
                                ldvt, &work_query, lwork, iwork );
    if( info != 0 ) {
        goto exit_level_1;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_1;
    }
    /* Call middle-level interface */
    info = LAPACKE_dgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt,
                                ldvt, work, lwork, iwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_1:
    LAPACKE_free( iwork );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_dgesdd", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgesdd_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dgesdd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgesdd_work( int matrix_order, char jobz, lapack_int m,
                                lapack_int n, double* a, lapack_int lda,
                                double* s, double* u, lapack_int ldu,
                                double* vt, lapack_int ldvt, double* work,
                                lapack_int lwork, lapack_int* iwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dgesdd( &jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work,
                       &lwork, iwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int nrows_u = ( LAPACKE_lsame( jobz, 'a' ) ||
                             LAPACKE_lsame( jobz, 's' ) ||
                             ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? m : 1;
        lapack_int ncols_u = ( LAPACKE_lsame( jobz, 'a' ) ||
                             ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? m :
                             ( LAPACKE_lsame( jobz, 's' ) ? MIN(m,n) : 1);
        lapack_int nrows_vt = ( LAPACKE_lsame( jobz, 'a' ) ||
                              ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? n :
                              ( LAPACKE_lsame( jobz, 's' ) ? MIN(m,n) : 1);
        lapack_int lda_t = MAX(1,m);
        lapack_int ldu_t = MAX(1,nrows_u);
        lapack_int ldvt_t = MAX(1,nrows_vt);
        double* a_t = NULL;
        double* u_t = NULL;
        double* vt_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_dgesdd_work", info );
            return info;
        }
        if( ldu < ncols_u ) {
            info = -9;
            LAPACKE_xerbla( "LAPACKE_dgesdd_work", info );
            return info;
        }
        if( ldvt < n ) {
            info = -11;
            LAPACKE_xerbla( "LAPACKE_dgesdd_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_dgesdd( &jobz, &m, &n, a, &lda_t, s, u, &ldu_t, vt, &ldvt_t,
                           work, &lwork, iwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            u_t = (double*)
                LAPACKE_malloc( sizeof(double) * ldu_t * MAX(1,ncols_u) );
            if( u_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            vt_t = (double*)
                LAPACKE_malloc( sizeof(double) * ldvt_t * MAX(1,n) );
            if( vt_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_dge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dgesdd( &jobz, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t,
                       &ldvt_t, work, &lwork, iwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t,
                               u, ldu );
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt,
                               ldvt );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            LAPACKE_free( vt_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            LAPACKE_free( u_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dgesdd_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dgesdd_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgesvd.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dgesvd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgesvd( int matrix_order, char jobu, char jobvt,
                           lapack_int m, lapack_int n, double* a,
                           lapack_int lda, double* s, double* u, lapack_int ldu,
                           double* vt, lapack_int ldvt, double* superb )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    double* work = NULL;
    double work_query;
    lapack_int i;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dgesvd", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_dge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -6;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_dgesvd_work( matrix_order, jobu, jobvt, m, n, a, lda, s, u,
                                ldu, vt, ldvt, &work_query, lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_dgesvd_work( matrix_order, jobu, jobvt, m, n, a, lda, s, u,
                                ldu, vt, ldvt, work, lwork );
    /* Backup significant data from working array(s) */
    for( i=0; i<MIN(m,n)-1; i++ ) {
        superb[i] = work[i+1];
    }
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_dgesvd", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgesvd_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dgesvd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgesvd_work( int matrix_order, char jobu, char jobvt,
                                lapack_int m, lapack_int n, double* a,
                                lapack_int lda, double* s, double* u,
                                lapack_int ldu, double* vt, lapack_int ldvt,
                                double* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dgesvd( &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt,
                       work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) ||
                             LAPACKE_lsame( jobu, 's' ) ) ? m : 1;
        lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m :
                             ( LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1);
        lapack_int nrows_vt = LAPACKE_lsame( jobvt, 'a' ) ? n :
                              ( LAPACKE_lsame( jobvt, 's' ) ? MIN(m,n) : 1);
        lapack_int lda_t = MAX(1,m);
        lapack_int ldu_t = MAX(1,nrows_u);
        lapack_int ldvt_t = MAX(1,nrows_vt);
        double* a_t = NULL;
        double* u_t = NULL;
        double* vt_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -7;
            LAPACKE_xerbla( "LAPACKE_dgesvd_work", info );
            return info;
        }
        if( ldu < ncols_u ) {
            info = -10;
            LAPACKE_xerbla( "LAPACKE_dgesvd_work", info );
            return info;
        }
        if( ldvt < n ) {
            info = -12;
            LAPACKE_xerbla( "LAPACKE_dgesvd_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_dgesvd( &jobu, &jobvt, &m, &n, a, &lda_t, s, u, &ldu_t, vt,
                           &ldvt_t, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            u_t = (double*)
                LAPACKE_malloc( sizeof(double) * ldu_t * MAX(1,ncols_u) );
            if( u_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            vt_t = (double*)
                LAPACKE_malloc( sizeof(double) * ldvt_t * MAX(1,n) );
            if( vt_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_dge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dgesvd( &jobu, &jobvt, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t,
                       &ldvt_t, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t,
                               u, ldu );
        }
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            LAPACKE_dge_trans( LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt,
                               ldvt );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            LAPACKE_free( vt_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            LAPACKE_free( u_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dgesvd_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dgesvd_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrf.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dgetrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgetrf( int matrix_order, lapack_int m, lapack_int n,
                           double* a, lapack_int lda, lapack_int* ipiv )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dgetrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_dge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_dgetrf_work( matrix_order, m, n, a, lda, ipiv );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dgetrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgetrf_work( int matrix_order, lapack_int m, lapack_int n,
                                double* a, lapack_int lda, lapack_int* ipiv )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dgetrf( &m, &n, a, &lda, ipiv, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        double* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_dgetrf_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_dge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dgetrf( &m, &n, a_t, &lda_t, ipiv, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dgetrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dgetrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgetri.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dgetri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgetri( int matrix_order, lapack_int n, double* a,
                           lapack_int lda, const lapack_int* ipiv )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    double* work = NULL;
    double work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dgetri", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_dge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -3;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_dgetri_work( matrix_order, n, a, lda, ipiv, &work_query,
                                lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_dgetri_work( matrix_order, n, a, lda, ipiv, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_dgetri", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgetri_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dgetri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgetri_work( int matrix_order, lapack_int n, double* a,
                                lapack_int lda, const lapack_int* ipiv,
                                double* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dgetri( &n, a, &lda, ipiv, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        double* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -4;
            LAPACKE_xerbla( "LAPACKE_dgetri_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_dgetri( &n, a, &lda_t, ipiv, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_dge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dgetri( &n, a_t, &lda_t, ipiv, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dgetri_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dgetri_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrs.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dgetrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgetrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const double* a, lapack_int lda,
                           const lapack_int* ipiv, double* b, lapack_int ldb )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dgetrs", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_dge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -5;
    }
    if( LAPACKE_dge_nancheck( matrix_order, n, nrhs, b, ldb ) ) {
        return -8;
    }
#endif
    return LAPACKE_dgetrs_work( matrix_order, trans, n, nrhs, a, lda, ipiv, b,
                                ldb );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dgetrs_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dgetrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dgetrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const double* a,
                                lapack_int lda, const lapack_int* ipiv,
                                double* b, lapack_int ldb )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dgetrs( &trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldb_t = MAX(1,n);
        double* a_t = NULL;
        double* b_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_dgetrs_work", info );
            return info;
        }
        if( ldb < nrhs ) {
            info = -9;
            LAPACKE_xerbla( "LAPACKE_dgetrs_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,nrhs) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_dge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        LAPACKE_dge_trans( matrix_order, n, nrhs, b, ldb, b_t, ldb_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dgetrs( &trans, &n, &nrhs, a_t, &lda_t, ipiv, b_t, &ldb_t,
                       &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb );
        /* Release memory and exit */
        LAPACKE_free( b_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dgetrs_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dgetrs_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dormqr.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dormqr
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dormqr( int matrix_layout, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const double* a, lapack_int lda, const double* tau,
                           double* c, lapack_int ldc )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    double* work = NULL;
    double work_query;
    lapack_int r;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dormqr", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    r = LAPACKE_lsame( side, 'l' ) ? m : n;
    if( LAPACKE_dge_nancheck( matrix_layout, r, k, a, lda ) ) {
        return -7;
    }
    if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) {
        return -10;
    }
    if( LAPACKE_d_nancheck( k, tau, 1 ) ) {
        return -9;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_dormqr_work( matrix_layout, side, trans, m, n, k, a, lda, tau,
                                c, ldc, &work_query, lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_dormqr_work( matrix_layout, side, trans, m, n, k, a, lda, tau,
                                c, ldc, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_dormqr", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dormqr_work.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dormqr
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dormqr_work( int matrix_layout, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const double* a, lapack_int lda,
                                const double* tau, double* c, lapack_int ldc,
                                double* work, lapack_int lwork )
{
    lapack_int info = 0;
    lapack_int r;
    lapack_int lda_t, ldc_t;
    double *a_t = NULL, *c_t = NULL;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dormqr( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work,
                       &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        r = LAPACKE_lsame( side, 'l' ) ? m : n;
        lda_t = MAX(1,r);
        ldc_t = MAX(1,m);
        /* Check leading dimension(s) */
        if( lda < k ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_dormqr_work", info );
            return info;
        }
        if( ldc < n ) {
            info = -11;
            LAPACKE_xerbla( "LAPACKE_dormqr_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_dormqr( &side, &trans, &m, &n, &k, a, &lda_t, tau, c, &ldc_t,
                           work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,k) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        c_t = (double*)LAPACKE_malloc( sizeof(double) * ldc_t * MAX(1,n) );
        if( c_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_dge_trans( matrix_layout, r, k, a, lda, a_t, lda_t );
        LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dormqr( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t,
                       work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
        /* Release memory and exit */
        LAPACKE_free( c_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dormqr_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dormqr_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrf.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dpotrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dpotrf( int matrix_order, char uplo, lapack_int n, double* a,
                           lapack_int lda )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dpotrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_dpo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_dpotrf_work( matrix_order, uplo, n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dpotrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dpotrf_work( int matrix_order, char uplo, lapack_int n,
                                double* a, lapack_int lda )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dpotrf( &uplo, &n, a, &lda, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        double* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_dpotrf_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_dpo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dpotrf( &uplo, &n, a_t, &lda_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dpo_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dpotrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dpotrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dpotri.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dpotri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dpotri( int matrix_order, char uplo, lapack_int n, double* a,
                           lapack_int lda )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dpotri", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_dpo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_dpotri_work( matrix_order, uplo, n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dpotri_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dpotri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dpotri_work( int matrix_order, char uplo, lapack_int n,
                                double* a, lapack_int lda )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dpotri( &uplo, &n, a, &lda, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        double* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_dpotri_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_dpo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dpotri( &uplo, &n, a_t, &lda_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dpo_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dpotri_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dpotri_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrs.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dpotrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dpotrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const double* a, lapack_int lda,
                           double* b, lapack_int ldb )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dpotrs", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_dpo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -5;
    }
    if( LAPACKE_dge_nancheck( matrix_order, n, nrhs, b, ldb ) ) {
        return -7;
    }
#endif
    return LAPACKE_dpotrs_work( matrix_order, uplo, n, nrhs, a, lda, b, ldb );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_dpotrs_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dpotrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_dpotrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const double* a,
                                lapack_int lda, double* b, lapack_int ldb )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dpotrs( &uplo, &n, &nrhs, a, &lda, b, &ldb, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldb_t = MAX(1,n);
        double* a_t = NULL;
        double* b_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_dpotrs_work", info );
            return info;
        }
        if( ldb < nrhs ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_dpotrs_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,nrhs) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_dpo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        LAPACKE_dge_trans( matrix_order, n, nrhs, b, ldb, b_t, ldb_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dpotrs( &uplo, &n, &nrhs, a_t, &lda_t, b_t, &ldb_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb );
        /* Release memory and exit */
        LAPACKE_free( b_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dpotrs_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dpotrs_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgeev.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function sgeev
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgeev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, float* a, lapack_int lda, float* wr,
                          float* wi, float* vl, lapack_int ldvl, float* vr,
                          lapack_int ldvr )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    float* work = NULL;
    float work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_sgeev", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_sge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -5;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_sgeev_work( matrix_order, jobvl, jobvr, n, a, lda, wr, wi,
                               vl, ldvl, vr, ldvr, &work_query, lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (float*)LAPACKE_malloc( sizeof(float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_sgeev_work( matrix_order, jobvl, jobvr, n, a, lda, wr, wi,
                               vl, ldvl, vr, ldvr, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_sgeev", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgeev_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function sgeev
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgeev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, float* a, lapack_int lda,
                               float* wr, float* wi, float* vl, lapack_int ldvl,
                               float* vr, lapack_int ldvr, float* work,
                               lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_sgeev( &jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr,
                      work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldvl_t = MAX(1,n);
        lapack_int ldvr_t = MAX(1,n);
        float* a_t = NULL;
        float* vl_t = NULL;
        float* vr_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_sgeev_work", info );
            return info;
        }
        if( ldvl < n ) {
            info = -10;
            LAPACKE_xerbla( "LAPACKE_sgeev_work", info );
            return info;
        }
        if( ldvr < n ) {
            info = -12;
            LAPACKE_xerbla( "LAPACKE_sgeev_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_sgeev( &jobvl, &jobvr, &n, a, &lda_t, wr, wi, vl, &ldvl_t,
                          vr, &ldvr_t, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            vl_t = (float*)LAPACKE_malloc( sizeof(float) * ldvl_t * MAX(1,n) );
            if( vl_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            vr_t = (float*)LAPACKE_malloc( sizeof(float) * ldvr_t * MAX(1,n) );
            if( vr_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_sge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_sgeev( &jobvl, &jobvr, &n, a_t, &lda_t, wr, wi, vl_t, &ldvl_t,
                      vr_t, &ldvr_t, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, n, vl_t, ldvl_t, vl, ldvl );
        }
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, n, vr_t, ldvr_t, vr, ldvr );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            LAPACKE_free( vr_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            LAPACKE_free( vl_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_sgeev_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_sgeev_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgeqrf.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function sgeqrf
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgeqrf( int matrix_layout, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, float* tau )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    float* work = NULL;
    float work_query;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_sgeqrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) {
        return -4;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_sgeqrf_work( matrix_layout, m, n, a, lda, tau, &work_query,
                                lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (float*)LAPACKE_malloc( sizeof(float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_sgeqrf_work( matrix_layout, m, n, a, lda, tau, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_sgeqrf", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgeqrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function sgeqrf
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgeqrf_work( int matrix_layout, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, float* tau,
                                float* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_sgeqrf( &m, &n, a, &lda, tau, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        float* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_sgeqrf_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_sgeqrf( &m, &n, a, &lda_t, tau, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_sge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_sgeqrf( &m, &n, a_t, &lda_t, tau, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_sgeqrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_sgeqrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgesdd.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function sgesdd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgesdd( int matrix_order, char jobz, lapack_int m,
                           lapack_int n, float* a, lapack_int lda, float* s,
                           float* u, lapack_int ldu, float* vt,
                           lapack_int ldvt )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    lapack_int* iwork = NULL;
    float* work = NULL;
    float work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_sgesdd", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_sge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -5;
    }
#endif
    /* Allocate memory for working array(s) */
    iwork = (lapack_int*)
        LAPACKE_malloc( sizeof(lapack_int) * MAX(1,8*MIN(m,n)) );
    if( iwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Query optimal working array(s) size */
    info = LAPACKE_sgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt,
                                ldvt, &work_query, lwork, iwork );
    if( info != 0 ) {
        goto exit_level_1;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (float*)LAPACKE_malloc( sizeof(float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_1;
    }
    /* Call middle-level interface */
    info = LAPACKE_sgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt,
                                ldvt, work, lwork, iwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_1:
    LAPACKE_free( iwork );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_sgesdd", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgesdd_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function sgesdd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgesdd_work( int matrix_order, char jobz, lapack_int m,
                                lapack_int n, float* a, lapack_int lda,
                                float* s, float* u, lapack_int ldu, float* vt,
                                lapack_int ldvt, float* work, lapack_int lwork,
                                lapack_int* iwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_sgesdd( &jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work,
                       &lwork, iwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int nrows_u = ( LAPACKE_lsame( jobz, 'a' ) ||
                             LAPACKE_lsame( jobz, 's' ) ||
                             ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? m : 1;
        lapack_int ncols_u = ( LAPACKE_lsame( jobz, 'a' ) ||
                             ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? m :
                             ( LAPACKE_lsame( jobz, 's' ) ? MIN(m,n) : 1);
        lapack_int nrows_vt = ( LAPACKE_lsame( jobz, 'a' ) ||
                              ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? n :
                              ( LAPACKE_lsame( jobz, 's' ) ? MIN(m,n) : 1);
        lapack_int lda_t = MAX(1,m);
        lapack_int ldu_t = MAX(1,nrows_u);
        lapack_int ldvt_t = MAX(1,nrows_vt);
        float* a_t = NULL;
        float* u_t = NULL;
        float* vt_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_sgesdd_work", info );
            return info;
        }
        if( ldu < ncols_u ) {
            info = -9;
            LAPACKE_xerbla( "LAPACKE_sgesdd_work", info );
            return info;
        }
        if( ldvt < n ) {
            info = -11;
            LAPACKE_xerbla( "LAPACKE_sgesdd_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_sgesdd( &jobz, &m, &n, a, &lda_t, s, u, &ldu_t, vt, &ldvt_t,
                           work, &lwork, iwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            u_t = (float*)
                LAPACKE_malloc( sizeof(float) * ldu_t * MAX(1,ncols_u) );
            if( u_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            vt_t = (float*)LAPACKE_malloc( sizeof(float) * ldvt_t * MAX(1,n) );
            if( vt_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_sge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_sgesdd( &jobz, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t,
                       &ldvt_t, work, &lwork, iwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t,
                               u, ldu );
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt,
                               ldvt );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            LAPACKE_free( vt_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            LAPACKE_free( u_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_sgesdd_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_sgesdd_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgesvd.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function sgesvd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgesvd( int matrix_order, char jobu, char jobvt,
                           lapack_int m, lapack_int n, float* a, lapack_int lda,
                           float* s, float* u, lapack_int ldu, float* vt,
                           lapack_int ldvt, float* superb )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    float* work = NULL;
    float work_query;
    lapack_int i;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_sgesvd", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_sge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -6;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_sgesvd_work( matrix_order, jobu, jobvt, m, n, a, lda, s, u,
                                ldu, vt, ldvt, &work_query, lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (float*)LAPACKE_malloc( sizeof(float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_sgesvd_work( matrix_order, jobu, jobvt, m, n, a, lda, s, u,
                                ldu, vt, ldvt, work, lwork );
    /* Backup significant data from working array(s) */
    for( i=0; i<MIN(m,n)-1; i++ ) {
        superb[i] = work[i+1];
    }
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_sgesvd", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgesvd_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function sgesvd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgesvd_work( int matrix_order, char jobu, char jobvt,
                                lapack_int m, lapack_int n, float* a,
                                lapack_int lda, float* s, float* u,
                                lapack_int ldu, float* vt, lapack_int ldvt,
                                float* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_sgesvd( &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt,
                       work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) ||
                             LAPACKE_lsame( jobu, 's' ) ) ? m : 1;
        lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m :
                             ( LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1);
        lapack_int nrows_vt = LAPACKE_lsame( jobvt, 'a' ) ? n :
                              ( LAPACKE_lsame( jobvt, 's' ) ? MIN(m,n) : 1);
        lapack_int lda_t = MAX(1,m);
        lapack_int ldu_t = MAX(1,nrows_u);
        lapack_int ldvt_t = MAX(1,nrows_vt);
        float* a_t = NULL;
        float* u_t = NULL;
        float* vt_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -7;
            LAPACKE_xerbla( "LAPACKE_sgesvd_work", info );
            return info;
        }
        if( ldu < ncols_u ) {
            info = -10;
            LAPACKE_xerbla( "LAPACKE_sgesvd_work", info );
            return info;
        }
        if( ldvt < n ) {
            info = -12;
            LAPACKE_xerbla( "LAPACKE_sgesvd_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_sgesvd( &jobu, &jobvt, &m, &n, a, &lda_t, s, u, &ldu_t, vt,
                           &ldvt_t, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            u_t = (float*)
                LAPACKE_malloc( sizeof(float) * ldu_t * MAX(1,ncols_u) );
            if( u_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            vt_t = (float*)LAPACKE_malloc( sizeof(float) * ldvt_t * MAX(1,n) );
            if( vt_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_sge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_sgesvd( &jobu, &jobvt, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t,
                       &ldvt_t, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t,
                               u, ldu );
        }
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            LAPACKE_sge_trans( LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt,
                               ldvt );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            LAPACKE_free( vt_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            LAPACKE_free( u_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_sgesvd_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_sgesvd_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrf.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function sgetrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgetrf( int matrix_order, lapack_int m, lapack_int n,
                           float* a, lapack_int lda, lapack_int* ipiv )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_sgetrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_sge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_sgetrf_work( matrix_order, m, n, a, lda, ipiv );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function sgetrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgetrf_work( int matrix_order, lapack_int m, lapack_int n,
                                float* a, lapack_int lda, lapack_int* ipiv )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_sgetrf( &m, &n, a, &lda, ipiv, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        float* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_sgetrf_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_sge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_sgetrf( &m, &n, a_t, &lda_t, ipiv, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_sgetrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_sgetrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgetri.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function sgetri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgetri( int matrix_order, lapack_int n, float* a,
                           lapack_int lda, const lapack_int* ipiv )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    float* work = NULL;
    float work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_sgetri", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_sge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -3;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_sgetri_work( matrix_order, n, a, lda, ipiv, &work_query,
                                lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (float*)LAPACKE_malloc( sizeof(float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_sgetri_work( matrix_order, n, a, lda, ipiv, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_sgetri", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgetri_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function sgetri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgetri_work( int matrix_order, lapack_int n, float* a,
                                lapack_int lda, const lapack_int* ipiv,
                                float* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_sgetri( &n, a, &lda, ipiv, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        float* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -4;
            LAPACKE_xerbla( "LAPACKE_sgetri_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_sgetri( &n, a, &lda_t, ipiv, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_sge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_sgetri( &n, a_t, &lda_t, ipiv, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_sgetri_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_sgetri_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrs.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function sgetrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgetrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const float* a, lapack_int lda,
                           const lapack_int* ipiv, float* b, lapack_int ldb )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_sgetrs", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_sge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -5;
    }
    if( LAPACKE_sge_nancheck( matrix_order, n, nrhs, b, ldb ) ) {
        return -8;
    }
#endif
    return LAPACKE_sgetrs_work( matrix_order, trans, n, nrhs, a, lda, ipiv, b,
                                ldb );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sgetrs_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function sgetrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sgetrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const float* a, lapack_int lda,
                                const lapack_int* ipiv, float* b,
                                lapack_int ldb )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_sgetrs( &trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldb_t = MAX(1,n);
        float* a_t = NULL;
        float* b_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_sgetrs_work", info );
            return info;
        }
        if( ldb < nrhs ) {
            info = -9;
            LAPACKE_xerbla( "LAPACKE_sgetrs_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,nrhs) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_sge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        LAPACKE_sge_trans( matrix_order, n, nrhs, b, ldb, b_t, ldb_t );
        /* Call LAPACK function and adjust info */
        LAPACK_sgetrs( &trans, &n, &nrhs, a_t, &lda_t, ipiv, b_t, &ldb_t,
                       &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb );
        /* Release memory and exit */
        LAPACKE_free( b_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_sgetrs_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_sgetrs_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sormqr.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function sormqr
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sormqr( int matrix_layout, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const float* a, lapack_int lda, const float* tau,
                           float* c, lapack_int ldc )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    float* work = NULL;
    float work_query;
    lapack_int r;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_sormqr", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    r = LAPACKE_lsame( side, 'l' ) ? m : n;
    if( LAPACKE_sge_nancheck( matrix_layout, r, k, a, lda ) ) {
        return -7;
    }
    if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) {
        return -10;
    }
    if( LAPACKE_s_nancheck( k, tau, 1 ) ) {
        return -9;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_sormqr_work( matrix_layout, side, trans, m, n, k, a, lda, tau,
                                c, ldc, &work_query, lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = (lapack_int)work_query;
    /* Allocate memory for work arrays */
    work = (float*)LAPACKE_malloc( sizeof(float) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_sormqr_work( matrix_layout, side, trans, m, n, k, a, lda, tau,
                                c, ldc, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_sormqr", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_sormqr_work.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function sormqr
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_sormqr_work( int matrix_layout, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const float* a, lapack_int lda,
                                const float* tau, float* c, lapack_int ldc,
                                float* work, lapack_int lwork )
{
    lapack_int info = 0;
    lapack_int r;
    lapack_int lda_t, ldc_t;
    float *a_t = NULL, *c_t = NULL;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_sormqr( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work,
                       &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        r = LAPACKE_lsame( side, 'l' ) ? m : n;
        lda_t = MAX(1,r);
        ldc_t = MAX(1,m);
        /* Check leading dimension(s) */
        if( lda < k ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_sormqr_work", info );
            return info;
        }
        if( ldc < n ) {
            info = -11;
            LAPACKE_xerbla( "LAPACKE_sormqr_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_sormqr( &side, &trans, &m, &n, &k, a, &lda_t, tau, c, &ldc_t,
                           work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,k) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        c_t = (float*)LAPACKE_malloc( sizeof(float) * ldc_t * MAX(1,n) );
        if( c_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_sge_trans( matrix_layout, r, k, a, lda, a_t, lda_t );
        LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
        /* Call LAPACK function and adjust info */
        LAPACK_sormqr( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t,
                       work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
        /* Release memory and exit */
        LAPACKE_free( c_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_sormqr_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_sormqr_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_spotrf.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function spotrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_spotrf( int matrix_order, char uplo, lapack_int n, float* a,
                           lapack_int lda )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_spotrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_spo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_spotrf_work( matrix_order, uplo, n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_spotrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function spotrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_spotrf_work( int matrix_order, char uplo, lapack_int n,
                                float* a, lapack_int lda )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_spotrf( &uplo, &n, a, &lda, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        float* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_spotrf_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_spo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_spotrf( &uplo, &n, a_t, &lda_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_spo_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_spotrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_spotrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_spotri.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function spotri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_spotri( int matrix_order, char uplo, lapack_int n, float* a,
                           lapack_int lda )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_spotri", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_spo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_spotri_work( matrix_order, uplo, n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_spotri_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function spotri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_spotri_work( int matrix_order, char uplo, lapack_int n,
                                float* a, lapack_int lda )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_spotri( &uplo, &n, a, &lda, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        float* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_spotri_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_spo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_spotri( &uplo, &n, a_t, &lda_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_spo_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_spotri_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_spotri_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_spotrs.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function spotrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_spotrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const float* a, lapack_int lda,
                           float* b, lapack_int ldb )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_spotrs", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_spo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -5;
    }
    if( LAPACKE_sge_nancheck( matrix_order, n, nrhs, b, ldb ) ) {
        return -7;
    }
#endif
    return LAPACKE_spotrs_work( matrix_order, uplo, n, nrhs, a, lda, b, ldb );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_spotrs_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function spotrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_spotrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const float* a, lapack_int lda,
                                float* b, lapack_int ldb )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_spotrs( &uplo, &n, &nrhs, a, &lda, b, &ldb, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldb_t = MAX(1,n);
        float* a_t = NULL;
        float* b_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_spotrs_work", info );
            return info;
        }
        if( ldb < nrhs ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_spotrs_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,nrhs) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_spo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        LAPACKE_sge_trans( matrix_order, n, nrhs, b, ldb, b_t, ldb_t );
        /* Call LAPACK function and adjust info */
        LAPACK_spotrs( &uplo, &n, &nrhs, a_t, &lda_t, b_t, &ldb_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb );
        /* Release memory and exit */
        LAPACKE_free( b_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_spotrs_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_spotrs_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgeev.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zgeev
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgeev( int matrix_order, char jobvl, char jobvr,
                          lapack_int n, lapack_complex_double* a,
                          lapack_int lda, lapack_complex_double* w,
                          lapack_complex_double* vl, lapack_int ldvl,
                          lapack_complex_double* vr, lapack_int ldvr )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    double* rwork = NULL;
    lapack_complex_double* work = NULL;
    lapack_complex_double work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zgeev", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_zge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -5;
    }
#endif
    /* Allocate memory for working array(s) */
    rwork = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,2*n) );
    if( rwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Query optimal working array(s) size */
    info = LAPACKE_zgeev_work( matrix_order, jobvl, jobvr, n, a, lda, w, vl,
                               ldvl, vr, ldvr, &work_query, lwork, rwork );
    if( info != 0 ) {
        goto exit_level_1;
    }
    lwork = LAPACK_Z2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_double*)
        LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_1;
    }
    /* Call middle-level interface */
    info = LAPACKE_zgeev_work( matrix_order, jobvl, jobvr, n, a, lda, w, vl,
                               ldvl, vr, ldvr, work, lwork, rwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_1:
    LAPACKE_free( rwork );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_zgeev", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgeev_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zgeev
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgeev_work( int matrix_order, char jobvl, char jobvr,
                               lapack_int n, lapack_complex_double* a,
                               lapack_int lda, lapack_complex_double* w,
                               lapack_complex_double* vl, lapack_int ldvl,
                               lapack_complex_double* vr, lapack_int ldvr,
                               lapack_complex_double* work, lapack_int lwork,
                               double* rwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zgeev( &jobvl, &jobvr, &n, a, &lda, w, vl, &ldvl, vr, &ldvr,
                      work, &lwork, rwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldvl_t = MAX(1,n);
        lapack_int ldvr_t = MAX(1,n);
        lapack_complex_double* a_t = NULL;
        lapack_complex_double* vl_t = NULL;
        lapack_complex_double* vr_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_zgeev_work", info );
            return info;
        }
        if( ldvl < n ) {
            info = -9;
            LAPACKE_xerbla( "LAPACKE_zgeev_work", info );
            return info;
        }
        if( ldvr < n ) {
            info = -11;
            LAPACKE_xerbla( "LAPACKE_zgeev_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_zgeev( &jobvl, &jobvr, &n, a, &lda_t, w, vl, &ldvl_t, vr,
                          &ldvr_t, work, &lwork, rwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            vl_t = (lapack_complex_double*)
                LAPACKE_malloc( sizeof(lapack_complex_double) *
                                ldvl_t * MAX(1,n) );
            if( vl_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            vr_t = (lapack_complex_double*)
                LAPACKE_malloc( sizeof(lapack_complex_double) *
                                ldvr_t * MAX(1,n) );
            if( vr_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_zge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zgeev( &jobvl, &jobvr, &n, a_t, &lda_t, w, vl_t, &ldvl_t, vr_t,
                      &ldvr_t, work, &lwork, rwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, n, vl_t, ldvl_t, vl, ldvl );
        }
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, n, vr_t, ldvr_t, vr, ldvr );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobvr, 'v' ) ) {
            LAPACKE_free( vr_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobvl, 'v' ) ) {
            LAPACKE_free( vl_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zgeev_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zgeev_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgeqrf.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zgeqrf
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgeqrf( int matrix_layout, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_complex_double* tau )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    lapack_complex_double* work = NULL;
    lapack_complex_double work_query;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zgeqrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_zge_nancheck( matrix_layout, m, n, a, lda ) ) {
        return -4;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_zgeqrf_work( matrix_layout, m, n, a, lda, tau, &work_query,
                                lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = LAPACK_Z2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_double*)
        LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_zgeqrf_work( matrix_layout, m, n, a, lda, tau, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_zgeqrf", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgeqrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zgeqrf
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgeqrf_work( int matrix_layout, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_complex_double* tau,
                                lapack_complex_double* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zgeqrf( &m, &n, a, &lda, tau, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        lapack_complex_double* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_zgeqrf_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_zgeqrf( &m, &n, a, &lda_t, tau, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_zge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zgeqrf( &m, &n, a_t, &lda_t, tau, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zgeqrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zgeqrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgesdd.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zgesdd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgesdd( int matrix_order, char jobz, lapack_int m,
                           lapack_int n, lapack_complex_double* a,
                           lapack_int lda, double* s, lapack_complex_double* u,
                           lapack_int ldu, lapack_complex_double* vt,
                           lapack_int ldvt )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    /* Additional scalars declarations for work arrays */
    size_t lrwork;
    lapack_int* iwork = NULL;
    double* rwork = NULL;
    lapack_complex_double* work = NULL;
    lapack_complex_double work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zgesdd", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_zge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -5;
    }
#endif
    /* Additional scalars initializations for work arrays */
    if( LAPACKE_lsame( jobz, 'n' ) ) {
        lrwork = MAX(1,5*MIN(m,n));
    } else {
        lrwork = (size_t)5*MAX(1,MIN(m,n))*MAX(1,MIN(m,n))+7*MIN(m,n);
    }
    /* Allocate memory for working array(s) */
    iwork = (lapack_int*)
        LAPACKE_malloc( sizeof(lapack_int) * MAX(1,8*MIN(m,n)) );
    if( iwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    rwork = (double*)LAPACKE_malloc( sizeof(double) * lrwork );
    if( rwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_1;
    }
    /* Query optimal working array(s) size */
    info = LAPACKE_zgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt,
                                ldvt, &work_query, lwork, rwork, iwork );
    if( info != 0 ) {
        goto exit_level_2;
    }
    lwork = LAPACK_Z2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_double*)
        LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_2;
    }
    /* Call middle-level interface */
    info = LAPACKE_zgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt,
                                ldvt, work, lwork, rwork, iwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_2:
    LAPACKE_free( rwork );
exit_level_1:
    LAPACKE_free( iwork );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_zgesdd", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgesdd_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zgesdd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgesdd_work( int matrix_order, char jobz, lapack_int m,
                                lapack_int n, lapack_complex_double* a,
                                lapack_int lda, double* s,
                                lapack_complex_double* u, lapack_int ldu,
                                lapack_complex_double* vt, lapack_int ldvt,
                                lapack_complex_double* work, lapack_int lwork,
                                double* rwork, lapack_int* iwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zgesdd( &jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work,
                       &lwork, rwork, iwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int nrows_u = ( LAPACKE_lsame( jobz, 'a' ) ||
                             LAPACKE_lsame( jobz, 's' ) ||
                             ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? m : 1;
        lapack_int ncols_u = ( LAPACKE_lsame( jobz, 'a' ) ||
                             ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? m :
                             ( LAPACKE_lsame( jobz, 's' ) ? MIN(m,n) : 1);
        lapack_int nrows_vt = ( LAPACKE_lsame( jobz, 'a' ) ||
                              ( LAPACKE_lsame( jobz, 'o' ) && m<n) ) ? n :
                              ( LAPACKE_lsame( jobz, 's' ) ? MIN(m,n) : 1);
        lapack_int lda_t = MAX(1,m);
        lapack_int ldu_t = MAX(1,nrows_u);
        lapack_int ldvt_t = MAX(1,nrows_vt);
        lapack_complex_double* a_t = NULL;
        lapack_complex_double* u_t = NULL;
        lapack_complex_double* vt_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_zgesdd_work", info );
            return info;
        }
        if( ldu < ncols_u ) {
            info = -9;
            LAPACKE_xerbla( "LAPACKE_zgesdd_work", info );
            return info;
        }
        if( ldvt < n ) {
            info = -11;
            LAPACKE_xerbla( "LAPACKE_zgesdd_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_zgesdd( &jobz, &m, &n, a, &lda_t, s, u, &ldu_t, vt, &ldvt_t,
                           work, &lwork, rwork, iwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            u_t = (lapack_complex_double*)
                LAPACKE_malloc( sizeof(lapack_complex_double) *
                                ldu_t * MAX(1,ncols_u) );
            if( u_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            vt_t = (lapack_complex_double*)
                LAPACKE_malloc( sizeof(lapack_complex_double) *
                                ldvt_t * MAX(1,n) );
            if( vt_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_zge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zgesdd( &jobz, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t,
                       &ldvt_t, work, &lwork, rwork, iwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t,
                               u, ldu );
        }
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt,
                               ldvt );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) {
            LAPACKE_free( vt_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) ||
            ( LAPACKE_lsame( jobz, 'o' ) && (m<n) ) ) {
            LAPACKE_free( u_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zgesdd_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zgesdd_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgesvd.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zgesvd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgesvd( int matrix_order, char jobu, char jobvt,
                           lapack_int m, lapack_int n, lapack_complex_double* a,
                           lapack_int lda, double* s, lapack_complex_double* u,
                           lapack_int ldu, lapack_complex_double* vt,
                           lapack_int ldvt, double* superb )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    double* rwork = NULL;
    lapack_complex_double* work = NULL;
    lapack_complex_double work_query;
    lapack_int i;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zgesvd", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_zge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -6;
    }
#endif
    /* Allocate memory for working array(s) */
    rwork = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,5*MIN(m,n)) );
    if( rwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Query optimal working array(s) size */
    info = LAPACKE_zgesvd_work( matrix_order, jobu, jobvt, m, n, a, lda, s, u,
                                ldu, vt, ldvt, &work_query, lwork, rwork );
    if( info != 0 ) {
        goto exit_level_1;
    }
    lwork = LAPACK_Z2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_double*)
        LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_1;
    }
    /* Call middle-level interface */
    info = LAPACKE_zgesvd_work( matrix_order, jobu, jobvt, m, n, a, lda, s, u,
                                ldu, vt, ldvt, work, lwork, rwork );
    /* Backup significant data from working array(s) */
    for( i=0; i<MIN(m,n)-1; i++ ) {
        superb[i] = rwork[i];
    }
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_1:
    LAPACKE_free( rwork );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_zgesvd", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgesvd_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zgesvd
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgesvd_work( int matrix_order, char jobu, char jobvt,
                                lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                double* s, lapack_complex_double* u,
                                lapack_int ldu, lapack_complex_double* vt,
                                lapack_int ldvt, lapack_complex_double* work,
                                lapack_int lwork, double* rwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zgesvd( &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt,
                       work, &lwork, rwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) ||
                             LAPACKE_lsame( jobu, 's' ) ) ? m : 1;
        lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m :
                             ( LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1);
        lapack_int nrows_vt = LAPACKE_lsame( jobvt, 'a' ) ? n :
                              ( LAPACKE_lsame( jobvt, 's' ) ? MIN(m,n) : 1);
        lapack_int lda_t = MAX(1,m);
        lapack_int ldu_t = MAX(1,nrows_u);
        lapack_int ldvt_t = MAX(1,nrows_vt);
        lapack_complex_double* a_t = NULL;
        lapack_complex_double* u_t = NULL;
        lapack_complex_double* vt_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -7;
            LAPACKE_xerbla( "LAPACKE_zgesvd_work", info );
            return info;
        }
        if( ldu < ncols_u ) {
            info = -10;
            LAPACKE_xerbla( "LAPACKE_zgesvd_work", info );
            return info;
        }
        if( ldvt < n ) {
            info = -12;
            LAPACKE_xerbla( "LAPACKE_zgesvd_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_zgesvd( &jobu, &jobvt, &m, &n, a, &lda_t, s, u, &ldu_t, vt,
                           &ldvt_t, work, &lwork, rwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            u_t = (lapack_complex_double*)
                LAPACKE_malloc( sizeof(lapack_complex_double) *
                                ldu_t * MAX(1,ncols_u) );
            if( u_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_1;
            }
        }
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            vt_t = (lapack_complex_double*)
                LAPACKE_malloc( sizeof(lapack_complex_double) *
                                ldvt_t * MAX(1,n) );
            if( vt_t == NULL ) {
                info = LAPACK_TRANSPOSE_MEMORY_ERROR;
                goto exit_level_2;
            }
        }
        /* Transpose input matrices */
        LAPACKE_zge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zgesvd( &jobu, &jobvt, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t,
                       &ldvt_t, work, &lwork, rwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t,
                               u, ldu );
        }
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt,
                               ldvt );
        }
        /* Release memory and exit */
        if( LAPACKE_lsame( jobvt, 'a' ) || LAPACKE_lsame( jobvt, 's' ) ) {
            LAPACKE_free( vt_t );
        }
exit_level_2:
        if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
            LAPACKE_free( u_t );
        }
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zgesvd_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zgesvd_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrf.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zgetrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgetrf( int matrix_order, lapack_int m, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           lapack_int* ipiv )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zgetrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_zge_nancheck( matrix_order, m, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_zgetrf_work( matrix_order, m, n, a, lda, ipiv );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zgetrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgetrf_work( int matrix_order, lapack_int m, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                lapack_int* ipiv )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zgetrf( &m, &n, a, &lda, ipiv, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        lapack_complex_double* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_zgetrf_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_zge_trans( matrix_order, m, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zgetrf( &m, &n, a_t, &lda_t, ipiv, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zgetrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zgetrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgetri.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zgetri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgetri( int matrix_order, lapack_int n,
                           lapack_complex_double* a, lapack_int lda,
                           const lapack_int* ipiv )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    lapack_complex_double* work = NULL;
    lapack_complex_double work_query;
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zgetri", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_zge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -3;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_zgetri_work( matrix_order, n, a, lda, ipiv, &work_query,
                                lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = LAPACK_Z2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_double*)
        LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_zgetri_work( matrix_order, n, a, lda, ipiv, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_zgetri", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgetri_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zgetri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgetri_work( int matrix_order, lapack_int n,
                                lapack_complex_double* a, lapack_int lda,
                                const lapack_int* ipiv,
                                lapack_complex_double* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zgetri( &n, a, &lda, ipiv, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_complex_double* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -4;
            LAPACKE_xerbla( "LAPACKE_zgetri_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_zgetri( &n, a, &lda_t, ipiv, work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_zge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zgetri( &n, a_t, &lda_t, ipiv, work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zgetri_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zgetri_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrs.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zgetrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgetrs( int matrix_order, char trans, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, const lapack_int* ipiv,
                           lapack_complex_double* b, lapack_int ldb )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zgetrs", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_zge_nancheck( matrix_order, n, n, a, lda ) ) {
        return -5;
    }
    if( LAPACKE_zge_nancheck( matrix_order, n, nrhs, b, ldb ) ) {
        return -8;
    }
#endif
    return LAPACKE_zgetrs_work( matrix_order, trans, n, nrhs, a, lda, ipiv, b,
                                ldb );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zgetrs_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zgetrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zgetrs_work( int matrix_order, char trans, lapack_int n,
                                lapack_int nrhs, const lapack_complex_double* a,
                                lapack_int lda, const lapack_int* ipiv,
                                lapack_complex_double* b, lapack_int ldb )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zgetrs( &trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldb_t = MAX(1,n);
        lapack_complex_double* a_t = NULL;
        lapack_complex_double* b_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_zgetrs_work", info );
            return info;
        }
        if( ldb < nrhs ) {
            info = -9;
            LAPACKE_xerbla( "LAPACKE_zgetrs_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) *
                            ldb_t * MAX(1,nrhs) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_zge_trans( matrix_order, n, n, a, lda, a_t, lda_t );
        LAPACKE_zge_trans( matrix_order, n, nrhs, b, ldb, b_t, ldb_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zgetrs( &trans, &n, &nrhs, a_t, &lda_t, ipiv, b_t, &ldb_t,
                       &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb );
        /* Release memory and exit */
        LAPACKE_free( b_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zgetrs_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zgetrs_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrf.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zpotrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zpotrf( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zpotrf", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_zpo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_zpotrf_work( matrix_order, uplo, n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrf_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zpotrf
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zpotrf_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zpotrf( &uplo, &n, a, &lda, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_complex_double* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_zpotrf_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_zpo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zpotrf( &uplo, &n, a_t, &lda_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zpo_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zpotrf_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zpotrf_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zpotri.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zpotri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zpotri( int matrix_order, char uplo, lapack_int n,
                           lapack_complex_double* a, lapack_int lda )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zpotri", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_zpo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -4;
    }
#endif
    return LAPACKE_zpotri_work( matrix_order, uplo, n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zpotri_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zpotri
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zpotri_work( int matrix_order, char uplo, lapack_int n,
                                lapack_complex_double* a, lapack_int lda )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zpotri( &uplo, &n, a, &lda, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_complex_double* a_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -5;
            LAPACKE_xerbla( "LAPACKE_zpotri_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        /* Transpose input matrices */
        LAPACKE_zpo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zpotri( &uplo, &n, a_t, &lda_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zpo_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
        /* Release memory and exit */
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zpotri_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zpotri_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrs.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zpotrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zpotrs( int matrix_order, char uplo, lapack_int n,
                           lapack_int nrhs, const lapack_complex_double* a,
                           lapack_int lda, lapack_complex_double* b,
                           lapack_int ldb )
{
    if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zpotrs", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_zpo_nancheck( matrix_order, uplo, n, a, lda ) ) {
        return -5;
    }
    if( LAPACKE_zge_nancheck( matrix_order, n, nrhs, b, ldb ) ) {
        return -7;
    }
#endif
    return LAPACKE_zpotrs_work( matrix_order, uplo, n, nrhs, a, lda, b, ldb );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zpotrs_work.c
================================================
/*****************************************************************************
  Copyright (c) 2011, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zpotrs
* Author: Intel Corporation
* Generated November, 2011
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zpotrs_work( int matrix_order, char uplo, lapack_int n,
                                lapack_int nrhs, const lapack_complex_double* a,
                                lapack_int lda, lapack_complex_double* b,
                                lapack_int ldb )
{
    lapack_int info = 0;
    if( matrix_order == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zpotrs( &uplo, &n, &nrhs, a, &lda, b, &ldb, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_order == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,n);
        lapack_int ldb_t = MAX(1,n);
        lapack_complex_double* a_t = NULL;
        lapack_complex_double* b_t = NULL;
        /* Check leading dimension(s) */
        if( lda < n ) {
            info = -6;
            LAPACKE_xerbla( "LAPACKE_zpotrs_work", info );
            return info;
        }
        if( ldb < nrhs ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_zpotrs_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) *
                            ldb_t * MAX(1,nrhs) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_zpo_trans( matrix_order, uplo, n, a, lda, a_t, lda_t );
        LAPACKE_zge_trans( matrix_order, n, nrhs, b, ldb, b_t, ldb_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zpotrs( &uplo, &n, &nrhs, a_t, &lda_t, b_t, &ldb_t, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, b_t, ldb_t, b, ldb );
        /* Release memory and exit */
        LAPACKE_free( b_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zpotrs_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zpotrs_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zunmqr.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function zunmqr
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zunmqr( int matrix_layout, char side, char trans,
                           lapack_int m, lapack_int n, lapack_int k,
                           const lapack_complex_double* a, lapack_int lda,
                           const lapack_complex_double* tau,
                           lapack_complex_double* c, lapack_int ldc )
{
    lapack_int info = 0;
    lapack_int lwork = -1;
    lapack_complex_double* work = NULL;
    lapack_complex_double work_query;
    lapack_int r;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zunmqr", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    r = LAPACKE_lsame( side, 'l' ) ? m : n;
    if( LAPACKE_zge_nancheck( matrix_layout, r, k, a, lda ) ) {
        return -7;
    }
    if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) {
        return -10;
    }
    if( LAPACKE_z_nancheck( k, tau, 1 ) ) {
        return -9;
    }
#endif
    /* Query optimal working array(s) size */
    info = LAPACKE_zunmqr_work( matrix_layout, side, trans, m, n, k, a, lda, tau,
                                c, ldc, &work_query, lwork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    lwork = LAPACK_Z2INT( work_query );
    /* Allocate memory for work arrays */
    work = (lapack_complex_double*)
        LAPACKE_malloc( sizeof(lapack_complex_double) * lwork );
    if( work == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_zunmqr_work( matrix_layout, side, trans, m, n, k, a, lda, tau,
                                c, ldc, work, lwork );
    /* Release memory and exit */
    LAPACKE_free( work );
exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_zunmqr", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/src/lapacke_zunmqr_work.c
================================================
/*****************************************************************************
  Copyright (c) 2014, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function zunmqr
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/

#include "lapacke_utils.h"

lapack_int LAPACKE_zunmqr_work( int matrix_layout, char side, char trans,
                                lapack_int m, lapack_int n, lapack_int k,
                                const lapack_complex_double* a, lapack_int lda,
                                const lapack_complex_double* tau,
                                lapack_complex_double* c, lapack_int ldc,
                                lapack_complex_double* work, lapack_int lwork )
{
    lapack_int info = 0;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_zunmqr( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work,
                       &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        lapack_int r = LAPACKE_lsame( side, 'l' ) ? m : n;
        lapack_int lda_t = MAX(1,r);
        lapack_int ldc_t = MAX(1,m);
        lapack_complex_double* a_t = NULL;
        lapack_complex_double* c_t = NULL;
        /* Check leading dimension(s) */
        if( lda < k ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_zunmqr_work", info );
            return info;
        }
        if( ldc < n ) {
            info = -11;
            LAPACKE_xerbla( "LAPACKE_zunmqr_work", info );
            return info;
        }
        /* Query optimal working array(s) size if requested */
        if( lwork == -1 ) {
            LAPACK_zunmqr( &side, &trans, &m, &n, &k, a, &lda_t, tau, c, &ldc_t,
                           work, &lwork, &info );
            return (info < 0) ? (info - 1) : info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,k) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        c_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * ldc_t * MAX(1,n) );
        if( c_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        /* Transpose input matrices */
        LAPACKE_zge_trans( matrix_layout, r, k, a, lda, a_t, lda_t );
        LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
        /* Call LAPACK function and adjust info */
        LAPACK_zunmqr( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t,
                       work, &lwork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
        /* Release memory and exit */
        LAPACKE_free( c_t );
exit_level_1:
        LAPACKE_free( a_t );
exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_zunmqr_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_zunmqr_work", info );
    }
    return info;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_c_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a vector for NaN entries. */

lapack_logical LAPACKE_c_nancheck( lapack_int n,
                                    const lapack_complex_float *x,
                                    lapack_int incx )
{
    lapack_int i, inc;

    if( incx == 0 ) return (lapack_logical) LAPACK_CISNAN( x[0] );
    inc = ( incx > 0 ) ? incx : -incx ;

    for( i = 0; i < n*inc; i+=inc ) {
        if( LAPACK_CISNAN( x[i] ) )
            return (lapack_logical) 1;
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_cge_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_cge_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const lapack_complex_float *a,
                                      lapack_int lda )
{
    lapack_int i, j;

    if( a == NULL ) return (lapack_logical) 0;

    if( matrix_order == LAPACK_COL_MAJOR ) {
        for( j = 0; j < n; j++ ) {
            for( i = 0; i < MIN( m, lda ); i++ ) {
                if( LAPACK_CISNAN( a[i+(size_t)j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    } else if ( matrix_order == LAPACK_ROW_MAJOR ) {
        for( i = 0; i < m; i++ ) {
            for( j = 0; j < MIN( n, lda ); j++ ) {
                if( LAPACK_CISNAN( a[(size_t)i*lda+j] ) )
                    return (lapack_logical) 1;
            }
        }
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_cge_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input general matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_cge_trans( int matrix_order, lapack_int m, lapack_int n,
                        const lapack_complex_float* in, lapack_int ldin,
                        lapack_complex_float* out, lapack_int ldout )
{
    lapack_int i, j, x, y;

    if( in == NULL || out == NULL ) return;

    if( matrix_order == LAPACK_COL_MAJOR ) {
        x = n;
        y = m;
    } else if ( matrix_order == LAPACK_ROW_MAJOR ) {
        x = m;
        y = n;
    } else {
        /* Unknown input layout */
        return;
    }

    /* In case of incorrect m, n, ldin or ldout the function does nothing */
    for( i = 0; i < MIN( y, ldin ); i++ ) {
        for( j = 0; j < MIN( x, ldout ); j++ ) {
            out[ (size_t)i*ldout + j ] = in[ (size_t)j*ldin + i ];
        }
    }
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_cpo_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_cpo_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const lapack_complex_float *a,
                                      lapack_int lda )
{
    return LAPACKE_ctr_nancheck( matrix_order, uplo, 'n', n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_cpo_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input symmetric matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_cpo_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout )
{
    LAPACKE_ctr_trans( matrix_order, uplo, 'n', n, in, ldin, out, ldout );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_ctr_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_ctr_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const lapack_complex_float *a,
                                      lapack_int lda )
{
    lapack_int i, j, st;
    lapack_logical colmaj, lower, unit;

    if( a == NULL ) return (lapack_logical) 0;

    colmaj = ( matrix_order == LAPACK_COL_MAJOR );
    lower  = LAPACKE_lsame( uplo, 'l' );
    unit   = LAPACKE_lsame( diag, 'u' );

    if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
        ( !lower  && !LAPACKE_lsame( uplo, 'u' ) ) ||
        ( !unit   && !LAPACKE_lsame( diag, 'n' ) ) ) {
        /* Just exit if any of input parameters are wrong */
        return (lapack_logical) 0;
    }
    if( unit ) {
        /* If unit, then don't touch diagonal, start from 1st column or row */
        st = 1;
    } else  {
        /* If non-unit, then check diagonal also, starting from [0,0] */
        st = 0;
    }

    /* Since col_major upper and row_major lower are equal,
     * and col_major lower and row_major upper are equals too -
     * using one code for equal cases. XOR( colmaj, upper )
     */
    if( ( colmaj || lower ) && !( colmaj && lower ) ) {
        for( j = st; j < n; j++ ) {
            for( i = 0; i < MIN( j+1-st, lda ); i++ ) {
                if( LAPACK_CISNAN( a[i+j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    } else {
        for( j = 0; j < n-st; j++ ) {
            for( i = j+st; i < MIN( n, lda ); i++ ) {
                if( LAPACK_CISNAN( a[i+j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_ctr_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input triangular matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_ctr_trans( int matrix_order, char uplo, char diag, lapack_int n,
                        const lapack_complex_float *in, lapack_int ldin,
                        lapack_complex_float *out, lapack_int ldout )
{
    lapack_int i, j, st;
    lapack_logical colmaj, lower, unit;

    if( in == NULL || out == NULL ) return ;

    colmaj = ( matrix_order == LAPACK_COL_MAJOR );
    lower  = LAPACKE_lsame( uplo, 'l' );
    unit   = LAPACKE_lsame( diag, 'u' );

    if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
        ( !lower  && !LAPACKE_lsame( uplo, 'u' ) ) ||
        ( !unit   && !LAPACKE_lsame( diag, 'n' ) ) ) {
        /* Just exit if any of input parameters are wrong */
        return;
    }
    if( unit ) {
        /* If unit, then don't touch diagonal, start from 1st column or row */
        st = 1;
    } else  {
        /* If non-unit, then check diagonal also, starting from [0,0] */
        st = 0;
    }

    /* Perform conversion:
     * Since col_major upper and row_major lower are equal,
     * and col_major lower and row_major upper are equals too -
     * using one code for equal cases. XOR( colmaj, upper )
     */
    if( ( colmaj || lower ) && !( colmaj && lower ) ) {
        for( j = st; j < MIN( n, ldout ); j++ ) {
            for( i = 0; i < MIN( j+1-st, ldin ); i++ ) {
                out[ j+i*ldout ] = in[ i+j*ldin ];
            }
        }
    } else {
        for( j = 0; j < MIN( n-st, ldout ); j++ ) {
            for( i = j+st; i < MIN( n, ldin ); i++ ) {
                out[ j+i*ldout ] = in[ i+j*ldin ];
            }
        }
    }
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_d_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a vector for NaN entries. */

lapack_logical LAPACKE_d_nancheck( lapack_int n,
                                    const double *x,
                                    lapack_int incx )
{
    lapack_int i, inc;

    if( incx == 0 ) return (lapack_logical) LAPACK_DISNAN( x[0] );
    inc = ( incx > 0 ) ? incx : -incx ;

    for( i = 0; i < n*inc; i+=inc ) {
        if( LAPACK_DISNAN( x[i] ) )
            return (lapack_logical) 1;
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_dge_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_dge_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const double *a,
                                      lapack_int lda )
{
    lapack_int i, j;

    if( a == NULL ) return (lapack_logical) 0;

    if( matrix_order == LAPACK_COL_MAJOR ) {
        for( j = 0; j < n; j++ ) {
            for( i = 0; i < MIN( m, lda ); i++ ) {
                if( LAPACK_DISNAN( a[i+(size_t)j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    } else if ( matrix_order == LAPACK_ROW_MAJOR ) {
        for( i = 0; i < m; i++ ) {
            for( j = 0; j < MIN( n, lda ); j++ ) {
                if( LAPACK_DISNAN( a[(size_t)i*lda+j] ) )
                    return (lapack_logical) 1;
            }
        }
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_dge_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input general matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_dge_trans( int matrix_order, lapack_int m, lapack_int n,
                        const double* in, lapack_int ldin,
                        double* out, lapack_int ldout )
{
    lapack_int i, j, x, y;

    if( in == NULL || out == NULL ) return;

    if( matrix_order == LAPACK_COL_MAJOR ) {
        x = n;
        y = m;
    } else if ( matrix_order == LAPACK_ROW_MAJOR ) {
        x = m;
        y = n;
    } else {
        /* Unknown input layout */
        return;
    }

    /* In case of incorrect m, n, ldin or ldout the function does nothing */
    for( i = 0; i < MIN( y, ldin ); i++ ) {
        for( j = 0; j < MIN( x, ldout ); j++ ) {
            out[ (size_t)i*ldout + j ] = in[ (size_t)j*ldin + i ];
        }
    }
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_dpo_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_dpo_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const double *a,
                                      lapack_int lda )
{
    return LAPACKE_dtr_nancheck( matrix_order, uplo, 'n', n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_dpo_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input symmetric matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_dpo_trans( int matrix_order, char uplo, lapack_int n,
                        const double *in, lapack_int ldin,
                        double *out, lapack_int ldout )
{
    LAPACKE_dtr_trans( matrix_order, uplo, 'n', n, in, ldin, out, ldout );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_dtr_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_dtr_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const double *a,
                                      lapack_int lda )
{
    lapack_int i, j, st;
    lapack_logical colmaj, lower, unit;

    if( a == NULL ) return (lapack_logical) 0;

    colmaj = ( matrix_order == LAPACK_COL_MAJOR );
    lower  = LAPACKE_lsame( uplo, 'l' );
    unit   = LAPACKE_lsame( diag, 'u' );

    if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
        ( !lower  && !LAPACKE_lsame( uplo, 'u' ) ) ||
        ( !unit   && !LAPACKE_lsame( diag, 'n' ) ) ) {
        /* Just exit if any of input parameters are wrong */
        return (lapack_logical) 0;
    }
    if( unit ) {
        /* If unit, then don't touch diagonal, start from 1st column or row */
        st = 1;
    } else  {
        /* If non-unit, then check diagonal also, starting from [0,0] */
        st = 0;
    }

    /* Since col_major upper and row_major lower are equal,
     * and col_major lower and row_major upper are equals too -
     * using one code for equal cases. XOR( colmaj, upper )
     */
    if( ( colmaj || lower ) && !( colmaj && lower ) ) {
        for( j = st; j < n; j++ ) {
            for( i = 0; i < MIN( j+1-st, lda ); i++ ) {
                if( LAPACK_DISNAN( a[i+j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    } else {
        for( j = 0; j < n-st; j++ ) {
            for( i = j+st; i < MIN( n, lda ); i++ ) {
                if( LAPACK_DISNAN( a[i+j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_dtr_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input triangular matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_dtr_trans( int matrix_order, char uplo, char diag, lapack_int n,
                        const double *in, lapack_int ldin,
                        double *out, lapack_int ldout )
{
    lapack_int i, j, st;
    lapack_logical colmaj, lower, unit;

    if( in == NULL || out == NULL ) return ;

    colmaj = ( matrix_order == LAPACK_COL_MAJOR );
    lower  = LAPACKE_lsame( uplo, 'l' );
    unit   = LAPACKE_lsame( diag, 'u' );

    if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
        ( !lower  && !LAPACKE_lsame( uplo, 'u' ) ) ||
        ( !unit   && !LAPACKE_lsame( diag, 'n' ) ) ) {
        /* Just exit if any of input parameters are wrong */
        return;
    }
    if( unit ) {
        /* If unit, then don't touch diagonal, start from 1st column or row */
        st = 1;
    } else  {
        /* If non-unit, then check diagonal also, starting from [0,0] */
        st = 0;
    }

    /* Perform conversion:
     * Since col_major upper and row_major lower are equal,
     * and col_major lower and row_major upper are equals too -
     * using one code for equal cases. XOR( colmaj, upper )
     */
    if( ( colmaj || lower ) && !( colmaj && lower ) ) {
        for( j = st; j < MIN( n, ldout ); j++ ) {
            for( i = 0; i < MIN( j+1-st, ldin ); i++ ) {
                out[ j+i*ldout ] = in[ i+j*ldin ];
            }
        }
    } else {
        for( j = 0; j < MIN( n-st, ldout ); j++ ) {
            for( i = j+st; i < MIN( n, ldin ); i++ ) {
                out[ j+i*ldout ] = in[ i+j*ldin ];
            }
        }
    }
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_lsame.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK lsame
* Author: Intel Corporation
* Created in January, 2010
*****************************************************************************/

#include "lapacke_utils.h"

lapack_logical LAPACKE_lsame( char ca,  char cb )
{
    return (lapack_logical) LAPACK_lsame( &ca, &cb, 1, 1 );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_s_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a vector for NaN entries. */

lapack_logical LAPACKE_s_nancheck( lapack_int n,
                                    const float *x,
                                    lapack_int incx )
{
    lapack_int i, inc;

    if( incx == 0 ) return (lapack_logical) LAPACK_SISNAN( x[0] );
    inc = ( incx > 0 ) ? incx : -incx ;

    for( i = 0; i < n*inc; i+=inc ) {
        if( LAPACK_SISNAN( x[i] ) )
            return (lapack_logical) 1;
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_sge_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_sge_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const float *a,
                                      lapack_int lda )
{
    lapack_int i, j;

    if( a == NULL ) return (lapack_logical) 0;

    if( matrix_order == LAPACK_COL_MAJOR ) {
        for( j = 0; j < n; j++ ) {
            for( i = 0; i < MIN( m, lda ); i++ ) {
                if( LAPACK_SISNAN( a[i+(size_t)j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    } else if ( matrix_order == LAPACK_ROW_MAJOR ) {
        for( i = 0; i < m; i++ ) {
            for( j = 0; j < MIN( n, lda ); j++ ) {
                if( LAPACK_SISNAN( a[(size_t)i*lda+j] ) )
                    return (lapack_logical) 1;
            }
        }
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_sge_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input general matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_sge_trans( int matrix_order, lapack_int m, lapack_int n,
                        const float* in, lapack_int ldin,
                        float* out, lapack_int ldout )
{
    lapack_int i, j, x, y;

    if( in == NULL || out == NULL ) return;

    if( matrix_order == LAPACK_COL_MAJOR ) {
        x = n;
        y = m;
    } else if ( matrix_order == LAPACK_ROW_MAJOR ) {
        x = m;
        y = n;
    } else {
        /* Unknown input layout */
        return;
    }

    /* In case of incorrect m, n, ldin or ldout the function does nothing */
    for( i = 0; i < MIN( y, ldin ); i++ ) {
        for( j = 0; j < MIN( x, ldout ); j++ ) {
            out[ (size_t)i*ldout + j ] = in[ (size_t)j*ldin + i ];
        }
    }
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_spo_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_spo_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const float *a,
                                      lapack_int lda )
{
    return LAPACKE_str_nancheck( matrix_order, uplo, 'n', n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_spo_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input symmetric matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_spo_trans( int matrix_order, char uplo, lapack_int n,
                        const float *in, lapack_int ldin,
                        float *out, lapack_int ldout )
{
    LAPACKE_str_trans( matrix_order, uplo, 'n', n, in, ldin, out, ldout );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_str_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_str_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const float *a,
                                      lapack_int lda )
{
    lapack_int i, j, st;
    lapack_logical colmaj, lower, unit;

    if( a == NULL ) return (lapack_logical) 0;

    colmaj = ( matrix_order == LAPACK_COL_MAJOR );
    lower  = LAPACKE_lsame( uplo, 'l' );
    unit   = LAPACKE_lsame( diag, 'u' );

    if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
        ( !lower  && !LAPACKE_lsame( uplo, 'u' ) ) ||
        ( !unit   && !LAPACKE_lsame( diag, 'n' ) ) ) {
        /* Just exit if any of input parameters are wrong */
        return (lapack_logical) 0;
    }
    if( unit ) {
        /* If unit, then don't touch diagonal, start from 1st column or row */
        st = 1;
    } else  {
        /* If non-unit, then check diagonal also, starting from [0,0] */
        st = 0;
    }

    /* Since col_major upper and row_major lower are equal,
     * and col_major lower and row_major upper are equals too -
     * using one code for equal cases. XOR( colmaj, upper )
     */
    if( ( colmaj || lower ) && !( colmaj && lower ) ) {
        for( j = st; j < n; j++ ) {
            for( i = 0; i < MIN( j+1-st, lda ); i++ ) {
                if( LAPACK_SISNAN( a[i+j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    } else {
        for( j = 0; j < n-st; j++ ) {
            for( i = j+st; i < MIN( n, lda ); i++ ) {
                if( LAPACK_SISNAN( a[i+j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_str_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input triangular matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_str_trans( int matrix_order, char uplo, char diag, lapack_int n,
                        const float *in, lapack_int ldin,
                        float *out, lapack_int ldout )
{
    lapack_int i, j, st;
    lapack_logical colmaj, lower, unit;

    if( in == NULL || out == NULL ) return ;

    colmaj = ( matrix_order == LAPACK_COL_MAJOR );
    lower  = LAPACKE_lsame( uplo, 'l' );
    unit   = LAPACKE_lsame( diag, 'u' );

    if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
        ( !lower  && !LAPACKE_lsame( uplo, 'u' ) ) ||
        ( !unit   && !LAPACKE_lsame( diag, 'n' ) ) ) {
        /* Just exit if any of input parameters are wrong */
        return;
    }
    if( unit ) {
        /* If unit, then don't touch diagonal, start from 1st column or row */
        st = 1;
    } else  {
        /* If non-unit, then check diagonal also, starting from [0,0] */
        st = 0;
    }

    /* Perform conversion:
     * Since col_major upper and row_major lower are equal,
     * and col_major lower and row_major upper are equals too -
     * using one code for equal cases. XOR( colmaj, upper )
     */
    if( ( colmaj || lower ) && !( colmaj && lower ) ) {
        for( j = st; j < MIN( n, ldout ); j++ ) {
            for( i = 0; i < MIN( j+1-st, ldin ); i++ ) {
                out[ j+i*ldout ] = in[ i+j*ldin ];
            }
        }
    } else {
        for( j = 0; j < MIN( n-st, ldout ); j++ ) {
            for( i = j+st; i < MIN( n, ldin ); i++ ) {
                out[ j+i*ldout ] = in[ i+j*ldin ];
            }
        }
    }
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_xerbla.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK lsame
* Author: Intel Corporation
* Created in January, 2010
*****************************************************************************/

#include <stdio.h>
#include "lapacke_utils.h"

void LAPACKE_xerbla( const char *name, lapack_int info )
{
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        printf( "Not enough memory to allocate work array in %s\n", name );
    } else if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
        printf( "Not enough memory to transpose matrix in %s\n", name );
    } else if( info < 0 ) {
        printf( "Wrong parameter %d in %s\n", -(int) info, name );
    }
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_z_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a vector for NaN entries. */

lapack_logical LAPACKE_z_nancheck( lapack_int n,
                                    const lapack_complex_double *x,
                                    lapack_int incx )
{
    lapack_int i, inc;

    if( incx == 0 ) return (lapack_logical) LAPACK_ZISNAN( x[0] );
    inc = ( incx > 0 ) ? incx : -incx ;

    for( i = 0; i < n*inc; i+=inc ) {
        if( LAPACK_ZISNAN( x[i] ) )
            return (lapack_logical) 1;
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_zge_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_zge_nancheck( int matrix_order, lapack_int m,
                                      lapack_int n,
                                      const lapack_complex_double *a,
                                      lapack_int lda )
{
    lapack_int i, j;

    if( a == NULL ) return (lapack_logical) 0;

    if( matrix_order == LAPACK_COL_MAJOR ) {
        for( j = 0; j < n; j++ ) {
            for( i = 0; i < MIN( m, lda ); i++ ) {
                if( LAPACK_ZISNAN( a[i+(size_t)j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    } else if ( matrix_order == LAPACK_ROW_MAJOR ) {
        for( i = 0; i < m; i++ ) {
            for( j = 0; j < MIN( n, lda ); j++ ) {
                if( LAPACK_ZISNAN( a[(size_t)i*lda+j] ) )
                    return (lapack_logical) 1;
            }
        }
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_zge_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input general matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_zge_trans( int matrix_order, lapack_int m, lapack_int n,
                        const lapack_complex_double* in, lapack_int ldin,
                        lapack_complex_double* out, lapack_int ldout )
{
    lapack_int i, j, x, y;

    if( in == NULL || out == NULL ) return;

    if( matrix_order == LAPACK_COL_MAJOR ) {
        x = n;
        y = m;
    } else if ( matrix_order == LAPACK_ROW_MAJOR ) {
        x = m;
        y = n;
    } else {
        /* Unknown input layout */
        return;
    }

    /* In case of incorrect m, n, ldin or ldout the function does nothing */
    for( i = 0; i < MIN( y, ldin ); i++ ) {
        for( j = 0; j < MIN( x, ldout ); j++ ) {
            out[ (size_t)i*ldout + j ] = in[ (size_t)j*ldin + i ];
        }
    }
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_zpo_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_zpo_nancheck( int matrix_order, char uplo,
                                      lapack_int n,
                                      const lapack_complex_double *a,
                                      lapack_int lda )
{
    return LAPACKE_ztr_nancheck( matrix_order, uplo, 'n', n, a, lda );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_zpo_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input symmetric matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_zpo_trans( int matrix_order, char uplo, lapack_int n,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout )
{
    LAPACKE_ztr_trans( matrix_order, uplo, 'n', n, in, ldin, out, ldout );
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_ztr_nancheck.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/
#include "lapacke_utils.h"

/* Check a matrix for NaN entries. */

lapack_logical LAPACKE_ztr_nancheck( int matrix_order, char uplo, char diag,
                                      lapack_int n,
                                      const lapack_complex_double *a,
                                      lapack_int lda )
{
    lapack_int i, j, st;
    lapack_logical colmaj, lower, unit;

    if( a == NULL ) return (lapack_logical) 0;

    colmaj = ( matrix_order == LAPACK_COL_MAJOR );
    lower  = LAPACKE_lsame( uplo, 'l' );
    unit   = LAPACKE_lsame( diag, 'u' );

    if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
        ( !lower  && !LAPACKE_lsame( uplo, 'u' ) ) ||
        ( !unit   && !LAPACKE_lsame( diag, 'n' ) ) ) {
        /* Just exit if any of input parameters are wrong */
        return (lapack_logical) 0;
    }
    if( unit ) {
        /* If unit, then don't touch diagonal, start from 1st column or row */
        st = 1;
    } else  {
        /* If non-unit, then check diagonal also, starting from [0,0] */
        st = 0;
    }

    /* Since col_major upper and row_major lower are equal,
     * and col_major lower and row_major upper are equals too -
     * using one code for equal cases. XOR( colmaj, upper )
     */
    if( ( colmaj || lower ) && !( colmaj && lower ) ) {
        for( j = st; j < n; j++ ) {
            for( i = 0; i < MIN( j+1-st, lda ); i++ ) {
                if( LAPACK_ZISNAN( a[i+j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    } else {
        for( j = 0; j < n-st; j++ ) {
            for( i = j+st; i < MIN( n, lda ); i++ ) {
                if( LAPACK_ZISNAN( a[i+j*lda] ) )
                    return (lapack_logical) 1;
            }
        }
    }
    return (lapack_logical) 0;
}


================================================
FILE: ext/nmatrix_lapacke/lapacke/utils/lapacke_ztr_trans.c
================================================
/*****************************************************************************
  Copyright (c) 2010, Intel Corp.
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    * Neither the name of Intel Corporation nor the names of its contributors
      may be used to endorse or promote products derived from this software
      without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************
* Contents: Native C interface to LAPACK utility function
* Author: Intel Corporation
* Created in February, 2010
*****************************************************************************/

#include "lapacke_utils.h"

/* Converts input triangular matrix from row-major(C) to column-major(Fortran)
 * layout or vice versa.
 */

void LAPACKE_ztr_trans( int matrix_order, char uplo, char diag, lapack_int n,
                        const lapack_complex_double *in, lapack_int ldin,
                        lapack_complex_double *out, lapack_int ldout )
{
    lapack_int i, j, st;
    lapack_logical colmaj, lower, unit;

    if( in == NULL || out == NULL ) return ;

    colmaj = ( matrix_order == LAPACK_COL_MAJOR );
    lower  = LAPACKE_lsame( uplo, 'l' );
    unit   = LAPACKE_lsame( diag, 'u' );

    if( ( !colmaj && ( matrix_order != LAPACK_ROW_MAJOR ) ) ||
        ( !lower  && !LAPACKE_lsame( uplo, 'u' ) ) ||
        ( !unit   && !LAPACKE_lsame( diag, 'n' ) ) ) {
        /* Just exit if any of input parameters are wrong */
        return;
    }
    if( unit ) {
        /* If unit, then don't touch diagonal, start from 1st column or row */
        st = 1;
    } else  {
        /* If non-unit, then check diagonal also, starting from [0,0] */
        st = 0;
    }

    /* Perform conversion:
     * Since col_major upper and row_major lower are equal,
     * and col_major lower and row_major upper are equals too -
     * using one code for equal cases. XOR( colmaj, upper )
     */
    if( ( colmaj || lower ) && !( colmaj && lower ) ) {
        for( j = st; j < MIN( n, ldout ); j++ ) {
            for( i = 0; i < MIN( j+1-st, ldin ); i++ ) {
                out[ j+i*ldout ] = in[ i+j*ldin ];
            }
        }
    } else {
        for( j = 0; j < MIN( n-st, ldout ); j++ ) {
            for( i = j+st; i < MIN( n, ldin ); i++ ) {
                out[ j+i*ldout ] = in[ i+j*ldin ];
            }
        }
    }
}


================================================
FILE: ext/nmatrix_lapacke/lapacke.cpp
================================================
//This file is auto-generated by make_lapacke_cpp.rb
//It includes all source files in the lapacke/ subdirectory
#include "lapacke/src/lapacke_dgeev_work.c"
#include "lapacke/src/lapacke_zgeqrf_work.c"
#include "lapacke/src/lapacke_cgesdd.c"
#include "lapacke/src/lapacke_cunmqr.c"
#include "lapacke/src/lapacke_zgesdd_work.c"
#include "lapacke/src/lapacke_dgeqrf.c"
#include "lapacke/src/lapacke_dgesdd_work.c"
#include "lapacke/src/lapacke_zgeev_work.c"
#include "lapacke/src/lapacke_cpotri_work.c"
#include "lapacke/src/lapacke_dormqr_work.c"
#include "lapacke/src/lapacke_cpotri.c"
#include "lapacke/src/lapacke_cgesvd.c"
#include "lapacke/src/lapacke_cgetrf.c"
#include "lapacke/src/lapacke_dgetrf.c"
#include "lapacke/src/lapacke_sgetri_work.c"
#include "lapacke/src/lapacke_zgetrs.c"
#include "lapacke/src/lapacke_cgetrf_work.c"
#include "lapacke/src/lapacke_sgesvd.c"
#include "lapacke/src/lapacke_cpotrf.c"
#include "lapacke/src/lapacke_dgetrf_work.c"
#include "lapacke/src/lapacke_dgeev.c"
#include "lapacke/src/lapacke_zgeev.c"
#include "lapacke/src/lapacke_cgetri.c"
#include "lapacke/src/lapacke_zgetri_work.c"
#include "lapacke/src/lapacke_zpotrs_work.c"
#include "lapacke/src/lapacke_sormqr.c"
#include "lapacke/src/lapacke_sgeev_work.c"
#include "lapacke/src/lapacke_spotrf_work.c"
#include "lapacke/src/lapacke_zgetri.c"
#include "lapacke/src/lapacke_cgeqrf.c"
#include "lapacke/src/lapacke_zunmqr.c"
#include "lapacke/src/lapacke_zgesvd.c"
#include "lapacke/src/lapacke_dgetri_work.c"
#include "lapacke/src/lapacke_dgeqrf_work.c"
#include "lapacke/src/lapacke_dpotri.c"
#include "lapacke/src/lapacke_dpotri_work.c"
#include "lapacke/src/lapacke_spotrs.c"
#include "lapacke/src/lapacke_cgesvd_work.c"
#include "lapacke/src/lapacke_cpotrs.c"
#include "lapacke/src/lapacke_sormqr_work.c"
#include "lapacke/src/lapacke_zunmqr_work.c"
#include "lapacke/src/lapacke_sgetrs_work.c"
#include "lapacke/src/lapacke_cgeev_work.c"
#include "lapacke/src/lapacke_zpotrf_work.c"
#include "lapacke/src/lapacke_zgeqrf.c"
#include "lapacke/src/lapacke_sgesvd_work.c"
#include "lapacke/src/lapacke_spotrf.c"
#include "lapacke/src/lapacke_cunmqr_work.c"
#include "lapacke/src/lapacke_cpotrf_work.c"
#include "lapacke/src/lapacke_dgetri.c"
#include "lapacke/src/lapacke_cgeqrf_work.c"
#include "lapacke/src/lapacke_sgeqrf_work.c"
#include "lapacke/src/lapacke_zpotri.c"
#include "lapacke/src/lapacke_dgetrs.c"
#include "lapacke/src/lapacke_zgesdd.c"
#include "lapacke/src/lapacke_zpotri_work.c"
#include "lapacke/src/lapacke_sgeev.c"
#include "lapacke/src/lapacke_dgesvd.c"
#include "lapacke/src/lapacke_zpotrf.c"
#include "lapacke/src/lapacke_cgeev.c"
#include "lapacke/src/lapacke_spotri_work.c"
#include "lapacke/src/lapacke_zgetrf.c"
#include "lapacke/src/lapacke_dgetrs_work.c"
#include "lapacke/src/lapacke_cgesdd_work.c"
#include "lapacke/src/lapacke_spotrs_work.c"
#include "lapacke/src/lapacke_cpotrs_work.c"
#include "lapacke/src/lapacke_cgetrs.c"
#include "lapacke/src/lapacke_sgeqrf.c"
#include "lapacke/src/lapacke_sgesdd.c"
#include "lapacke/src/lapacke_sgesdd_work.c"
#include "lapacke/src/lapacke_zgetrs_work.c"
#include "lapacke/src/lapacke_sgetri.c"
#include "lapacke/src/lapacke_spotri.c"
#include "lapacke/src/lapacke_dpotrf.c"
#include "lapacke/src/lapacke_cgetrs_work.c"
#include "lapacke/src/lapacke_zgetrf_work.c"
#include "lapacke/src/lapacke_sgetrf_work.c"
#include "lapacke/src/lapacke_dgesvd_work.c"
#include "lapacke/src/lapacke_dgesdd.c"
#include "lapacke/src/lapacke_cgetri_work.c"
#include "lapacke/src/lapacke_zpotrs.c"
#include "lapacke/src/lapacke_zgesvd_work.c"
#include "lapacke/src/lapacke_dpotrs_work.c"
#include "lapacke/src/lapacke_dormqr.c"
#include "lapacke/src/lapacke_dpotrs.c"
#include "lapacke/src/lapacke_sgetrf.c"
#include "lapacke/src/lapacke_dpotrf_work.c"
#include "lapacke/src/lapacke_sgetrs.c"
#include "lapacke/utils/lapacke_sge_nancheck.c"
#include "lapacke/utils/lapacke_zge_trans.c"
#include "lapacke/utils/lapacke_dpo_trans.c"
#include "lapacke/utils/lapacke_cpo_trans.c"
#include "lapacke/utils/lapacke_cge_trans.c"
#include "lapacke/utils/lapacke_dge_nancheck.c"
#include "lapacke/utils/lapacke_cpo_nancheck.c"
#include "lapacke/utils/lapacke_c_nancheck.c"
#include "lapacke/utils/lapacke_lsame.c"
#include "lapacke/utils/lapacke_str_nancheck.c"
#include "lapacke/utils/lapacke_zpo_trans.c"
#include "lapacke/utils/lapacke_str_trans.c"
#include "lapacke/utils/lapacke_ztr_nancheck.c"
#include "lapacke/utils/lapacke_cge_nancheck.c"
#include "lapacke/utils/lapacke_d_nancheck.c"
#include "lapacke/utils/lapacke_ctr_trans.c"
#include "lapacke/utils/lapacke_dge_trans.c"
#include "lapacke/utils/lapacke_sge_trans.c"
#include "lapacke/utils/lapacke_zge_nancheck.c"
#include "lapacke/utils/lapacke_dtr_nancheck.c"
#include "lapacke/utils/lapacke_s_nancheck.c"
#include "lapacke/utils/lapacke_spo_trans.c"
#include "lapacke/utils/lapacke_dtr_trans.c"
#include "lapacke/utils/lapacke_xerbla.c"
#include "lapacke/utils/lapacke_ctr_nancheck.c"
#include "lapacke/utils/lapacke_ztr_trans.c"
#include "lapacke/utils/lapacke_z_nancheck.c"
#include "lapacke/utils/lapacke_dpo_nancheck.c"
#include "lapacke/utils/lapacke_zpo_nancheck.c"
#include "lapacke/utils/lapacke_spo_nancheck.c"


================================================
FILE: ext/nmatrix_lapacke/lapacke_nmatrix.h
================================================
//need to define a few things before including the real lapacke.h

#include "data/data.h" //needed because this is where our complex types are defined

//tell LAPACKE to use our complex types
#define LAPACK_COMPLEX_CUSTOM
#define lapack_complex_float nm::Complex64
#define lapack_complex_double nm::Complex128

//define name-mangling scheme for FORTRAN functions
//ADD_ means that the symbol dgemm_ is associated with the fortran
//function DGEMM
#define ADD_

//now we can include the real lapacke.h
#include "lapacke.h"


================================================
FILE: ext/nmatrix_lapacke/make_lapacke_cpp.rb
================================================
#We want this to be a C++ file since our complex types require C++.

File.open("lapacke.cpp","w") do |file|
  file.puts "//This file is auto-generated by make_lapacke_cpp.rb"
  file.puts "//It includes all source files in the lapacke/ subdirectory"
  Dir["lapacke/**/*.c"].each do |file2|
    file.puts "#include \"#{file2}\""
  end
end


================================================
FILE: ext/nmatrix_lapacke/math_lapacke/cblas_local.h
================================================
//This is copied from CBLAS reference implementation.
#ifndef CBLAS_H
#define CBLAS_H
#include <stddef.h>

/* Allow the use in C++ code.  */
#ifdef __cplusplus
extern "C" 
{
#endif

/*
 * Enumerated and derived types
 */
#define CBLAS_INDEX size_t  /* this may vary between platforms */

//Remove enums from this file so we can use them in code that doesn't rely on CBLAS
#include "math/cblas_enums.h"

/*
 * ===========================================================================
 * Prototypes for level 1 BLAS functions (complex are recast as routines)
 * ===========================================================================
 */
float  cblas_sdsdot(const int N, const float alpha, const float *X,
                    const int incX, const float *Y, const int incY);
double cblas_dsdot(const int N, const float *X, const int incX, const float *Y,
                   const int incY);
float  cblas_sdot(const int N, const float  *X, const int incX,
                  const float  *Y, const int incY);
double cblas_ddot(const int N, const double *X, const int incX,
                  const double *Y, const int incY);

/*
 * Functions having prefixes Z and C only
 */
void   cblas_cdotu_sub(const int N, const void *X, const int incX,
                       const void *Y, const int incY, void *dotu);
void   cblas_cdotc_sub(const int N, const void *X, const int incX,
                       const void *Y, const int incY, void *dotc);

void   cblas_zdotu_sub(const int N, const void *X, const int incX,
                       const void *Y, const int incY, void *dotu);
void   cblas_zdotc_sub(const int N, const void *X, const int incX,
                       const void *Y, const int incY, void *dotc);


/*
 * Functions having prefixes S D SC DZ
 */
float  cblas_snrm2(const int N, const float *X, const int incX);
float  cblas_sasum(const int N, const float *X, const int incX);

double cblas_dnrm2(const int N, const double *X, const int incX);
double cblas_dasum(const int N, const double *X, const int incX);

float  cblas_scnrm2(const int N, const void *X, const int incX);
float  cblas_scasum(const int N, const void *X, const int incX);

double cblas_dznrm2(const int N, const void *X, const int incX);
double cblas_dzasum(const int N, const void *X, const int incX);


/*
 * Functions having standard 4 prefixes (S D C Z)
 */
CBLAS_INDEX cblas_isamax(const int N, const float  *X, const int incX);
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX);
CBLAS_INDEX cblas_icamax(const int N, const void   *X, const int incX);
CBLAS_INDEX cblas_izamax(const int N, const void   *X, const int incX);

/*
 * ===========================================================================
 * Prototypes for level 1 BLAS routines
 * ===========================================================================
 */

/* 
 * Routines with standard 4 prefixes (s, d, c, z)
 */
void cblas_sswap(const int N, float *X, const int incX, 
                 float *Y, const int incY);
void cblas_scopy(const int N, const float *X, const int incX, 
                 float *Y, const int incY);
void cblas_saxpy(const int N, const float alpha, const float *X,
                 const int incX, float *Y, const int incY);

void cblas_dswap(const int N, double *X, const int incX, 
                 double *Y, const int incY);
void cblas_dcopy(const int N, const double *X, const int incX, 
                 double *Y, const int incY);
void cblas_daxpy(const int N, const double alpha, const double *X,
                 const int incX, double *Y, const int incY);

void cblas_cswap(const int N, void *X, const int incX, 
                 void *Y, const int incY);
void cblas_ccopy(const int N, const void *X, const int incX, 
                 void *Y, const int incY);
void cblas_caxpy(const int N, const void *alpha, const void *X,
                 const int incX, void *Y, const int incY);

void cblas_zswap(const int N, void *X, const int incX, 
                 void *Y, const int incY);
void cblas_zcopy(const int N, const void *X, const int incX, 
                 void *Y, const int incY);
void cblas_zaxpy(const int N, const void *alpha, const void *X,
                 const int incX, void *Y, const int incY);


/* 
 * Routines with S and D prefix only
 */
void cblas_srotg(float *a, float *b, float *c, float *s);
void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P);
void cblas_srot(const int N, float *X, const int incX,
                float *Y, const int incY, const float c, const float s);
void cblas_srotm(const int N, float *X, const int incX,
                float *Y, const int incY, const float *P);

void cblas_drotg(double *a, double *b, double *c, double *s);
void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P);
void cblas_drot(const int N, double *X, const int incX,
                double *Y, const int incY, const double c, const double  s);
void cblas_drotm(const int N, double *X, const int incX,
                double *Y, const int incY, const double *P);


/* 
 * Routines with S D C Z CS and ZD prefixes
 */
void cblas_sscal(const int N, const float alpha, float *X, const int incX);
void cblas_dscal(const int N, const double alpha, double *X, const int incX);
void cblas_cscal(const int N, const void *alpha, void *X, const int incX);
void cblas_zscal(const int N, const void *alpha, void *X, const int incX);
void cblas_csscal(const int N, const float alpha, void *X, const int incX);
void cblas_zdscal(const int N, const double alpha, void *X, const int incX);

/*
 * ===========================================================================
 * Prototypes for level 2 BLAS
 * ===========================================================================
 */

/* 
 * Routines with standard 4 prefixes (S, D, C, Z)
 */
void cblas_sgemv(const enum CBLAS_ORDER order,
                 const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
                 const float alpha, const float *A, const int lda,
                 const float *X, const int incX, const float beta,
                 float *Y, const int incY);
void cblas_sgbmv(const enum CBLAS_ORDER order,
                 const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
                 const int KL, const int KU, const float alpha,
                 const float *A, const int lda, const float *X,
                 const int incX, const float beta, float *Y, const int incY);
void cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const float *A, const int lda, 
                 float *X, const int incX);
void cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const int K, const float *A, const int lda, 
                 float *X, const int incX);
void cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const float *Ap, float *X, const int incX);
void cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const float *A, const int lda, float *X,
                 const int incX);
void cblas_stbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const int K, const float *A, const int lda,
                 float *X, const int incX);
void cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const float *Ap, float *X, const int incX);

void cblas_dgemv(const enum CBLAS_ORDER order,
                 const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
                 const double alpha, const double *A, const int lda,
                 const double *X, const int incX, const double beta,
                 double *Y, const int incY);
void cblas_dgbmv(const enum CBLAS_ORDER order,
                 const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
                 const int KL, const int KU, const double alpha,
                 const double *A, const int lda, const double *X,
                 const int incX, const double beta, double *Y, const int incY);
void cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const double *A, const int lda, 
                 double *X, const int incX);
void cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const int K, const double *A, const int lda, 
                 double *X, const int incX);
void cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const double *Ap, double *X, const int incX);
void cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const double *A, const int lda, double *X,
                 const int incX);
void cblas_dtbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const int K, const double *A, const int lda,
                 double *X, const int incX);
void cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const double *Ap, double *X, const int incX);

void cblas_cgemv(const enum CBLAS_ORDER order,
                 const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
                 const void *alpha, const void *A, const int lda,
                 const void *X, const int incX, const void *beta,
                 void *Y, const int incY);
void cblas_cgbmv(const enum CBLAS_ORDER order,
                 const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
                 const int KL, const int KU, const void *alpha,
                 const void *A, const int lda, const void *X,
                 const int incX, const void *beta, void *Y, const int incY);
void cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const void *A, const int lda, 
                 void *X, const int incX);
void cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const int K, const void *A, const int lda, 
                 void *X, const int incX);
void cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const void *Ap, void *X, const int incX);
void cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const void *A, const int lda, void *X,
                 const int incX);
void cblas_ctbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const int K, const void *A, const int lda,
                 void *X, const int incX);
void cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const void *Ap, void *X, const int incX);

void cblas_zgemv(const enum CBLAS_ORDER order,
                 const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
                 const void *alpha, const void *A, const int lda,
                 const void *X, const int incX, const void *beta,
                 void *Y, const int incY);
void cblas_zgbmv(const enum CBLAS_ORDER order,
                 const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
                 const int KL, const int KU, const void *alpha,
                 const void *A, const int lda, const void *X,
                 const int incX, const void *beta, void *Y, const int incY);
void cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const void *A, const int lda, 
                 void *X, const int incX);
void cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const int K, const void *A, const int lda, 
                 void *X, const int incX);
void cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const void *Ap, void *X, const int incX);
void cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const void *A, const int lda, void *X,
                 const int incX);
void cblas_ztbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const int K, const void *A, const int lda,
                 void *X, const int incX);
void cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
                 const int N, const void *Ap, void *X, const int incX);


/* 
 * Routines with S and D prefixes only
 */
void cblas_ssymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const float alpha, const float *A,
                 const int lda, const float *X, const int incX,
                 const float beta, float *Y, const int incY);
void cblas_ssbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const int K, const float alpha, const float *A,
                 const int lda, const float *X, const int incX,
                 const float beta, float *Y, const int incY);
void cblas_sspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const float alpha, const float *Ap,
                 const float *X, const int incX,
                 const float beta, float *Y, const int incY);
void cblas_sger(const enum CBLAS_ORDER order, const int M, const int N,
                const float alpha, const float *X, const int incX,
                const float *Y, const int incY, float *A, const int lda);
void cblas_ssyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const float alpha, const float *X,
                const int incX, float *A, const int lda);
void cblas_sspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const float alpha, const float *X,
                const int incX, float *Ap);
void cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const float alpha, const float *X,
                const int incX, const float *Y, const int incY, float *A,
                const int lda);
void cblas_sspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const float alpha, const float *X,
                const int incX, const float *Y, const int incY, float *A);

void cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const double alpha, const double *A,
                 const int lda, const double *X, const int incX,
                 const double beta, double *Y, const int incY);
void cblas_dsbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const int K, const double alpha, const double *A,
                 const int lda, const double *X, const int incX,
                 const double beta, double *Y, const int incY);
void cblas_dspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const double alpha, const double *Ap,
                 const double *X, const int incX,
                 const double beta, double *Y, const int incY);
void cblas_dger(const enum CBLAS_ORDER order, const int M, const int N,
                const double alpha, const double *X, const int incX,
                const double *Y, const int incY, double *A, const int lda);
void cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const double alpha, const double *X,
                const int incX, double *A, const int lda);
void cblas_dspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const double alpha, const double *X,
                const int incX, double *Ap);
void cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const double alpha, const double *X,
                const int incX, const double *Y, const int incY, double *A,
                const int lda);
void cblas_dspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const double alpha, const double *X,
                const int incX, const double *Y, const int incY, double *A);


/* 
 * Routines with C and Z prefixes only
 */
void cblas_chemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const void *alpha, const void *A,
                 const int lda, const void *X, const int incX,
                 const void *beta, void *Y, const int incY);
void cblas_chbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const int K, const void *alpha, const void *A,
                 const int lda, const void *X, const int incX,
                 const void *beta, void *Y, const int incY);
void cblas_chpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const void *alpha, const void *Ap,
                 const void *X, const int incX,
                 const void *beta, void *Y, const int incY);
void cblas_cgeru(const enum CBLAS_ORDER order, const int M, const int N,
                 const void *alpha, const void *X, const int incX,
                 const void *Y, const int incY, void *A, const int lda);
void cblas_cgerc(const enum CBLAS_ORDER order, const int M, const int N,
                 const void *alpha, const void *X, const int incX,
                 const void *Y, const int incY, void *A, const int lda);
void cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const float alpha, const void *X, const int incX,
                void *A, const int lda);
void cblas_chpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const float alpha, const void *X,
                const int incX, void *A);
void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N,
                const void *alpha, const void *X, const int incX,
                const void *Y, const int incY, void *A, const int lda);
void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N,
                const void *alpha, const void *X, const int incX,
                const void *Y, const int incY, void *Ap);

void cblas_zhemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const void *alpha, const void *A,
                 const int lda, const void *X, const int incX,
                 const void *beta, void *Y, const int incY);
void cblas_zhbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const int K, const void *alpha, const void *A,
                 const int lda, const void *X, const int incX,
                 const void *beta, void *Y, const int incY);
void cblas_zhpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                 const int N, const void *alpha, const void *Ap,
                 const void *X, const int incX,
                 const void *beta, void *Y, const int incY);
void cblas_zgeru(const enum CBLAS_ORDER order, const int M, const int N,
                 const void *alpha, const void *X, const int incX,
                 const void *Y, const int incY, void *A, const int lda);
void cblas_zgerc(const enum CBLAS_ORDER order, const int M, const int N,
                 const void *alpha, const void *X, const int incX,
                 const void *Y, const int incY, void *A, const int lda);
void cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const double alpha, const void *X, const int incX,
                void *A, const int lda);
void cblas_zhpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
                const int N, const double alpha, const void *X,
                const int incX, void *A);
void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N,
                const void *alpha, const void *X, const int incX,
                const void *Y, const int incY, void *A, const int lda);
void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N,
                const void *alpha, const void *X, const int incX,
                const void *Y, const int incY, void *Ap);

/*
 * ===========================================================================
 * Prototypes for level 3 BLAS
 * ===========================================================================
 */

/* 
 * Routines with standard 4 prefixes (S, D, C, Z)
 */
void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
                 const int K, const float alpha, const float *A,
                 const int lda, const float *B, const int ldb,
                 const float beta, float *C, const int ldc);
void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const int M, const int N,
                 const float alpha, const float *A, const int lda,
                 const float *B, const int ldb, const float beta,
                 float *C, const int ldc);
void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                 const float alpha, const float *A, const int lda,
                 const float beta, float *C, const int ldc);
void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                  const float alpha, const float *A, const int lda,
                  const float *B, const int ldb, const float beta,
                  float *C, const int ldc);
void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_DIAG Diag, const int M, const int N,
                 const float alpha, const float *A, const int lda,
                 float *B, const int ldb);
void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_DIAG Diag, const int M, const int N,
                 const float alpha, const float *A, const int lda,
                 float *B, const int ldb);

void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
                 const int K, const double alpha, const double *A,
                 const int lda, const double *B, const int ldb,
                 const double beta, double *C, const int ldc);
void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const int M, const int N,
                 const double alpha, const double *A, const int lda,
                 const double *B, const int ldb, const double beta,
                 double *C, const int ldc);
void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                 const double alpha, const double *A, const int lda,
                 const double beta, double *C, const int ldc);
void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                  const double alpha, const double *A, const int lda,
                  const double *B, const int ldb, const double beta,
                  double *C, const int ldc);
void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_DIAG Diag, const int M, const int N,
                 const double alpha, const double *A, const int lda,
                 double *B, const int ldb);
void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_DIAG Diag, const int M, const int N,
                 const double alpha, const double *A, const int lda,
                 double *B, const int ldb);

void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
                 const int K, const void *alpha, const void *A,
                 const int lda, const void *B, const int ldb,
                 const void *beta, void *C, const int ldc);
void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const int M, const int N,
                 const void *alpha, const void *A, const int lda,
                 const void *B, const int ldb, const void *beta,
                 void *C, const int ldc);
void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                 const void *alpha, const void *A, const int lda,
                 const void *beta, void *C, const int ldc);
void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                  const void *alpha, const void *A, const int lda,
                  const void *B, const int ldb, const void *beta,
                  void *C, const int ldc);
void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_DIAG Diag, const int M, const int N,
                 const void *alpha, const void *A, const int lda,
                 void *B, const int ldb);
void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_DIAG Diag, const int M, const int N,
                 const void *alpha, const void *A, const int lda,
                 void *B, const int ldb);

void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
                 const int K, const void *alpha, const void *A,
                 const int lda, const void *B, const int ldb,
                 const void *beta, void *C, const int ldc);
void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const int M, const int N,
                 const void *alpha, const void *A, const int lda,
                 const void *B, const int ldb, const void *beta,
                 void *C, const int ldc);
void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                 const void *alpha, const void *A, const int lda,
                 const void *beta, void *C, const int ldc);
void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                  const void *alpha, const void *A, const int lda,
                  const void *B, const int ldb, const void *beta,
                  void *C, const int ldc);
void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_DIAG Diag, const int M, const int N,
                 const void *alpha, const void *A, const int lda,
                 void *B, const int ldb);
void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_DIAG Diag, const int M, const int N,
                 const void *alpha, const void *A, const int lda,
                 void *B, const int ldb);


/* 
 * Routines with prefixes C and Z only
 */
void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const int M, const int N,
                 const void *alpha, const void *A, const int lda,
                 const void *B, const int ldb, const void *beta,
                 void *C, const int ldc);
void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                 const float alpha, const void *A, const int lda,
                 const float beta, void *C, const int ldc);
void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                  const void *alpha, const void *A, const int lda,
                  const void *B, const int ldb, const float beta,
                  void *C, const int ldc);

void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
                 const enum CBLAS_UPLO Uplo, const int M, const int N,
                 const void *alpha, const void *A, const int lda,
                 const void *B, const int ldb, const void *beta,
                 void *C, const int ldc);
void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                 const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                 const double alpha, const void *A, const int lda,
                 const double beta, void *C, const int ldc);
void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                  const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
                  const void *alpha, const void *A, const int lda,
                  const void *B, const int ldb, const double beta,
                  void *C, const int ldc);

void cblas_xerbla(int p, const char *rout, const char *form, ...);

#ifdef __cplusplus
}
#endif 

#endif


================================================
FILE: ext/nmatrix_lapacke/math_lapacke/cblas_templates_lapacke.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == cblas_templaces_lapacke.h
//
// Define template functions for calling CBLAS functions in the
// nm::math::lapacke namespace.
//

#ifndef CBLAS_TEMPLATES_LAPACK_H
#define CBLAS_TEMPLATES_LAPACK_H

//includes so we have access to internal implementations
#include "math/rotg.h"
#include "math/rot.h"
#include "math/asum.h"
#include "math/nrm2.h"
#include "math/imax.h"
#include "math/scal.h"
#include "math/gemv.h"
#include "math/gemm.h"
#include "math/trsm.h"

namespace nm { namespace math { namespace lapacke {
 
//Add cblas templates in the correct namespace
#include "math/cblas_templates_core.h"

}}}

#endif


================================================
FILE: ext/nmatrix_lapacke/math_lapacke/lapacke_templates.h
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == lapacke_templates.h
//
// Templated functions for calling LAPACKE functions directly.
//

#ifndef LAPACKE_TEMPLATES_H
#define LAPACKE_TEMPLATES_H

namespace nm { namespace math { namespace lapacke {

//getrf
template <typename DType>
inline int getrf(const enum CBLAS_ORDER order, const int m, const int n, DType* a, const int lda, int* ipiv) {
  //We don't want to call the internal implementation since the the CLAPACK interface is slightly different than the LAPACKE.
  rb_raise(rb_eNotImpError, "lapacke_getrf not implemented for non_BLAS dtypes. Try clapack_getrf instead.");
  return 0;
}

template <>
inline int getrf(const enum CBLAS_ORDER order, const int m, const int n, float* a, const int lda, int* ipiv) {
  return LAPACKE_sgetrf(order, m, n, a, lda, ipiv);
}

template <>
inline int getrf(const enum CBLAS_ORDER order, const int m, const int n, double* a, const int lda, int* ipiv) {
  return LAPACKE_dgetrf(order, m, n, a, lda, ipiv);
}

template <>
inline int getrf(const enum CBLAS_ORDER order, const int m, const int n, Complex64* a, const int lda, int* ipiv) {
  return LAPACKE_cgetrf(order, m, n, a, lda, ipiv);
}

template <>
inline int getrf(const enum CBLAS_ORDER order, const int m, const int n, Complex128* a, const int lda, int* ipiv) {
  return LAPACKE_zgetrf(order, m, n, a, lda, ipiv);
}

template <typename DType>
inline int lapacke_getrf(const enum CBLAS_ORDER order, const int m, const int n, void* a, const int lda, int* ipiv) {
  return getrf<DType>(order, m, n, static_cast<DType*>(a), lda, ipiv);
}

//geqrf
template <typename DType>
inline int geqrf(const enum CBLAS_ORDER order, const int m, const int n, DType* a, const int lda, DType* tau) {
  rb_raise(rb_eNotImpError, "lapacke_geqrf not implemented for non_BLAS dtypes.");
  return 0;
}

template <>
inline int geqrf(const enum CBLAS_ORDER order, const int m, const int n, float* a, const int lda, float* tau) {
  return LAPACKE_sgeqrf(order, m, n, a, lda, tau);
}

template < > 
inline int geqrf(const enum CBLAS_ORDER order, const int m, const int n, double* a, const int lda, double* tau) {
  return LAPACKE_dgeqrf(order, m, n, a, lda, tau);
}

template <>
inline int geqrf(const enum CBLAS_ORDER order, const int m, const int n, Complex64* a, const int lda, Complex64* tau) {
  return LAPACKE_cgeqrf(order, m, n, a, lda, tau);
}

template <>
inline int geqrf(const enum CBLAS_ORDER order, const int m, const int n, Complex128* a, const int lda, Complex128* tau) {
  return LAPACKE_zgeqrf(order, m, n, a, lda, tau);
}

template <typename DType>
inline int lapacke_geqrf(const enum CBLAS_ORDER order, const int m, const int n, void* a, const int lda, void* tau) {
  return geqrf<DType>(order, m, n, static_cast<DType*>(a), lda, static_cast<DType*>(tau));
}

//ormqr
template <typename DType>
inline int ormqr(const enum CBLAS_ORDER order, char side, char trans, const int m, const int n, const int k, DType* a, const int lda, DType* tau, DType* c, const int ldc) {
  rb_raise(rb_eNotImpError, "lapacke_ormqr not implemented for non_BLAS dtypes.");
  return 0;
}

template <>
inline int ormqr(const enum CBLAS_ORDER order, char side, char trans, const int m, const int n, const int k, float* a, const int lda, float* tau, float* c, const int ldc) {
  return LAPACKE_sormqr(order, side, trans, m, n, k, a, lda, tau, c, ldc);
}

template <> 
inline int ormqr(const enum CBLAS_ORDER order, char side, char trans, const int m, const int n, const int k, double* a, const int lda, double* tau, double* c, const int ldc) {
  return LAPACKE_dormqr(order, side, trans, m, n, k, a, lda, tau, c, ldc);
}

template <typename DType>
inline int lapacke_ormqr(const enum CBLAS_ORDER order, char side, char trans, const int m, const int n, const int k, void* a, const int lda, void* tau, void* c, const int ldc) {
  return ormqr<DType>(order, side, trans, m, n, k, static_cast<DType*>(a), lda, static_cast<DType*>(tau), static_cast<DType*>(c), ldc);
}

//unmqr
template <typename DType>
inline int unmqr(const enum CBLAS_ORDER order, char side, char trans, const int m, const int n, const int k, DType* a, const int lda, DType* tau, DType* c, const int ldc) {
  rb_raise(rb_eNotImpError, "lapacke_unmqr not implemented for non complex dtypes.");
  return 0;
}

template <>
inline int unmqr(const enum CBLAS_ORDER order, char side, char trans, const int m, const int n, const int k, Complex64* a, const int lda, Complex64* tau, Complex64* c, const int ldc) {
  return LAPACKE_cunmqr(order, side, trans, m, n, k, a, lda, tau, c, ldc);
}

template <> 
inline int unmqr(const enum CBLAS_ORDER order, char side, char trans, const int m, const int n, const int k, Complex128* a, const int lda, Complex128* tau, Complex128* c, const int ldc) {
  return LAPACKE_zunmqr(order, side, trans, m, n, k, a, lda, tau, c, ldc);
}

template <typename DType>
inline int lapacke_unmqr(const enum CBLAS_ORDER order, char side, char trans, const int m, const int n, const int k, void* a, const int lda, void* tau, void* c, const int ldc) {
  return unmqr<DType>(order, side, trans, m, n, k, static_cast<DType*>(a), lda, static_cast<DType*>(tau), static_cast<DType*>(c), ldc);
}

//getri
template <typename DType>
inline int getri(const enum CBLAS_ORDER order, const int n, DType* a, const int lda, const int* ipiv) {
  rb_raise(rb_eNotImpError, "getri not yet implemented for non-BLAS dtypes");
  return 0;
}

template <>
inline int getri(const enum CBLAS_ORDER order, const int n, float* a, const int lda, const int* ipiv) {
  return LAPACKE_sgetri(order, n, a, lda, ipiv);
}

template <>
inline int getri(const enum CBLAS_ORDER order, const int n, double* a, const int lda, const int* ipiv) {
  return LAPACKE_dgetri(order, n, a, lda, ipiv);
}

template <>
inline int getri(const enum CBLAS_ORDER order, const int n, Complex64* a, const int lda, const int* ipiv) {
  return LAPACKE_cgetri(order, n, a, lda, ipiv);
}

template <>
inline int getri(const enum CBLAS_ORDER order, const int n, Complex128* a, const int lda, const int* ipiv) {
  return LAPACKE_zgetri(order, n, a, lda, ipiv);
}

template <typename DType>
inline int lapacke_getri(const enum CBLAS_ORDER order, const int n, void* a, const int lda, const int* ipiv) {
  return getri<DType>(order, n, static_cast<DType*>(a), lda, ipiv);
}

//getrs
template <typename DType>
inline int getrs(const enum CBLAS_ORDER Order, char Trans, const int N, const int NRHS, const DType* A,
           const int lda, const int* ipiv, DType* B, const int ldb)
{
  rb_raise(rb_eNotImpError, "lapacke_getrs not implemented for non_BLAS dtypes. Try clapack_getrs instead.");
  return 0;
}

template <>
inline int getrs(const enum CBLAS_ORDER Order, char Trans, const int N, const int NRHS, const float* A,
           const int lda, const int* ipiv, float* B, const int ldb)
{
  return LAPACKE_sgetrs(Order, Trans, N, NRHS, A, lda, ipiv, B, ldb);
}

template <>
inline int getrs(const enum CBLAS_ORDER Order, char Trans, const int N, const int NRHS, const double* A,
           const int lda, const int* ipiv, double* B, const int ldb)
{
  return LAPACKE_dgetrs(Order, Trans, N, NRHS, A, lda, ipiv, B, ldb);
}

template <>
inline int getrs(const enum CBLAS_ORDER Order, char Trans, const int N, const int NRHS, const Complex64* A,
           const int lda, const int* ipiv, Complex64* B, const int ldb)
{
  return LAPACKE_cgetrs(Order, Trans, N, NRHS, A, lda, ipiv, B, ldb);
}

template <>
inline int getrs(const enum CBLAS_ORDER Order, char Trans, const int N, const int NRHS, const Complex128* A,
           const int lda, const int* ipiv, Complex128* B, const int ldb)
{
  return LAPACKE_zgetrs(Order, Trans, N, NRHS, A, lda, ipiv, B, ldb);
}

template <typename DType>
inline int lapacke_getrs(const enum CBLAS_ORDER order, char trans, const int n, const int nrhs,
                         const void* a, const int lda, const int* ipiv, void* b, const int ldb) {
  return getrs<DType>(order, trans, n, nrhs, static_cast<const DType*>(a), lda, ipiv, static_cast<DType*>(b), ldb);
}

//potrf
template <typename DType>
inline int potrf(const enum CBLAS_ORDER order, char uplo, const int N, DType* A, const int lda) {
  rb_raise(rb_eNotImpError, "not implemented for non-BLAS dtypes");
  return 0;
}

template <>
inline int potrf(const enum CBLAS_ORDER order, char uplo, const int N, float* A, const int lda) {
  return LAPACKE_spotrf(order, uplo, N, A, lda);
}

template <>
inline int potrf(const enum CBLAS_ORDER order, char uplo, const int N, double* A, const int lda) {
  return LAPACKE_dpotrf(order, uplo, N, A, lda);
}

template <>
inline int potrf(const enum CBLAS_ORDER order, char uplo, const int N, Complex64* A, const int lda) {
  return LAPACKE_cpotrf(order, uplo, N, A, lda);
}

template <>
inline int potrf(const enum CBLAS_ORDER order, char uplo, const int N, Complex128* A, const int lda) {
  return LAPACKE_zpotrf(order, uplo, N, A, lda);
}

template <typename DType>
inline int lapacke_potrf(const enum CBLAS_ORDER order, char uplo, const int n, void* a, const int lda) {
  return potrf<DType>(order, uplo, n, static_cast<DType*>(a), lda);
}

//potrs
template <typename DType>
inline int potrs(const enum CBLAS_ORDER Order, char Uplo, const int N, const int NRHS, const DType* A,
           const int lda, DType* B, const int ldb)
{
  rb_raise(rb_eNotImpError, "not implemented for non-BLAS dtypes");
  return 0;
}

template <>
inline int potrs<float> (const enum CBLAS_ORDER Order, char Uplo, const int N, const int NRHS, const float* A,
           const int lda, float* B, const int ldb)
{
  return LAPACKE_spotrs(Order, Uplo, N, NRHS, A, lda, B, ldb);
}

template <>
inline int potrs<double>(const enum CBLAS_ORDER Order, char Uplo, const int N, const int NRHS, const double* A,
           const int lda, double* B, const int ldb)
{
  return LAPACKE_dpotrs(Order, Uplo, N, NRHS, A, lda, B, ldb);
}

template <>
inline int potrs<Complex64>(const enum CBLAS_ORDER Order, char Uplo, const int N, const int NRHS, const Complex64* A,
           const int lda, Complex64* B, const int ldb)
{
  return LAPACKE_cpotrs(Order, Uplo, N, NRHS, A, lda, B, ldb);
}

template <>
inline int potrs<Complex128>(const enum CBLAS_ORDER Order, char Uplo, const int N, const int NRHS, const Complex128* A,
           const int lda, Complex128* B, const int ldb)
{
  return LAPACKE_zpotrs(Order, Uplo, N, NRHS, A, lda, B, ldb);
}

template <typename DType>
inline int lapacke_potrs(const enum CBLAS_ORDER order, char uplo, const int n, const int nrhs,
                         const void* a, const int lda, void* b, const int ldb) {
  return potrs<DType>(order, uplo, n, nrhs, static_cast<const DType*>(a), lda, static_cast<DType*>(b), ldb);
}

//potri
template <typename DType>
inline int potri(const enum CBLAS_ORDER order, char uplo, const int n, DType* a, const int lda) {
  rb_raise(rb_eNotImpError, "potri not yet implemented for non-BLAS dtypes");
  return 0;
}

template <>
inline int potri(const enum CBLAS_ORDER order, char uplo, const int n, float* a, const int lda) {
  return LAPACKE_spotri(order, uplo, n, a, lda);
}

template <>
inline int potri(const enum CBLAS_ORDER order, char uplo, const int n, double* a, const int lda) {
  return LAPACKE_dpotri(order, uplo, n, a, lda);
}

template <>
inline int potri(const enum CBLAS_ORDER order, char uplo, const int n, Complex64* a, const int lda) {
  return LAPACKE_cpotri(order, uplo, n, a, lda);
}

template <>
inline int potri(const enum CBLAS_ORDER order, char uplo, const int n, Complex128* a, const int lda) {
  return LAPACKE_zpotri(order, uplo, n, a, lda);
}

template <typename DType>
inline int lapacke_potri(const enum CBLAS_ORDER order, char uplo, const int n, void* a, const int lda) {
  return potri<DType>(order, uplo, n, static_cast<DType*>(a), lda);
}

//gesvd
template <typename DType, typename CType>
inline int gesvd(int matrix_layout, char jobu, char jobvt, int m, int n, DType* a, int lda, CType* s, DType* u, int ldu, DType* vt, int ldvt, CType* superb) {
  rb_raise(rb_eNotImpError, "gesvd not yet implemented for non-BLAS dtypes");
  return 0;
}

template <>
inline int gesvd<float, float>(int matrix_layout, char jobu, char jobvt, int m, int n, float* a, int lda, float* s, float* u, int ldu, float* vt, int ldvt, float* superb) {
  return LAPACKE_sgesvd(matrix_layout, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, superb);
}

template <>
inline int gesvd<double, double>(int matrix_layout, char jobu, char jobvt, int m, int n, double* a, int lda, double* s, double* u, int ldu, double* vt, int ldvt, double* superb) {
  return LAPACKE_dgesvd(matrix_layout, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, superb);
}

template <>
inline int gesvd<nm::Complex64, float>(int matrix_layout, char jobu, char jobvt, int m, int n, nm::Complex64* a, int lda, float* s, nm::Complex64* u, int ldu, nm::Complex64* vt, int ldvt, float* superb) {
  return LAPACKE_cgesvd(matrix_layout, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, superb);
}

template <>
inline int gesvd<nm::Complex128, double>(int matrix_layout, char jobu, char jobvt, int m, int n, nm::Complex128* a, int lda, double* s, nm::Complex128* u, int ldu, nm::Complex128* vt, int ldvt, double* superb) {
  return LAPACKE_zgesvd(matrix_layout, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, superb);
}

template <typename DType, typename CType>
inline int lapacke_gesvd(int matrix_layout, char jobu, char jobvt, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt, void* superb) {
  return gesvd<DType,CType>(matrix_layout, jobu, jobvt, m, n, static_cast<DType*>(a), lda, static_cast<CType*>(s), static_cast<DType*>(u), ldu, static_cast<DType*>(vt), ldvt, static_cast<CType*>(superb));
}

//gesdd
template <typename DType, typename CType>
inline int gesdd(int matrix_layout, char jobz, int m, int n, DType* a, int lda, CType* s, DType* u, int ldu, DType* vt, int ldvt) {
  rb_raise(rb_eNotImpError, "gesdd not yet implemented for non-BLAS dtypes");
  return 0;
}

template <>
inline int gesdd<float, float>(int matrix_layout, char jobz, int m, int n, float* a, int lda, float* s, float* u, int ldu, float* vt, int ldvt) {
  return LAPACKE_sgesdd(matrix_layout, jobz, m, n, a, lda, s, u, ldu, vt, ldvt);
}

template <>
inline int gesdd<double, double>(int matrix_layout, char jobz, int m, int n, double* a, int lda, double* s, double* u, int ldu, double* vt, int ldvt) {
  return LAPACKE_dgesdd(matrix_layout, jobz, m, n, a, lda, s, u, ldu, vt, ldvt);
}

template <>
inline int gesdd<nm::Complex64, float>(int matrix_layout, char jobz, int m, int n, nm::Complex64* a, int lda, float* s, nm::Complex64* u, int ldu, nm::Complex64* vt, int ldvt) {
  return LAPACKE_cgesdd(matrix_layout, jobz, m, n, a, lda, s, u, ldu, vt, ldvt);
}

template <>
inline int gesdd<nm::Complex128, double>(int matrix_layout, char jobz, int m, int n, nm::Complex128* a, int lda, double* s, nm::Complex128* u, int ldu, nm::Complex128* vt, int ldvt) {
  return LAPACKE_zgesdd(matrix_layout, jobz, m, n, a, lda, s, u, ldu, vt, ldvt);
}

template <typename DType, typename CType>
inline int lapacke_gesdd(int matrix_layout, char jobz, int m, int n, void* a, int lda, void* s, void* u, int ldu, void* vt, int ldvt) {
  return gesdd<DType,CType>(matrix_layout, jobz, m, n, static_cast<DType*>(a), lda, static_cast<CType*>(s), static_cast<DType*>(u), ldu, static_cast<DType*>(vt), ldvt);
}

//geev
//This one is a little tricky. The signature is different for the complex
//versions than for the real ones. This is because real matrices can have
//complex eigenvalues. For the complex types, the eigenvalues are just
//returned in argument that's a complex array, but for real types the real
//parts of the eigenvalues are returned
//in one (array) argument, and the complex parts in a separate argument.
//The solution is that the template takes an vi argument, but it is just
//ignored in the specializations for complex types.

template <typename DType>
inline int geev(int matrix_layout, char jobvl, char jobvr, int n, DType* a, int lda, DType* w, DType* wi, DType* vl, int ldvl, DType* vr, int ldvr) {
  rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
  return -1;
}

template <>
inline int geev(int matrix_layout, char jobvl, char jobvr, int n, float* a, int lda, float* w, float* wi, float* vl, int ldvl, float* vr, int ldvr) {
  return LAPACKE_sgeev(matrix_layout, jobvl, jobvr, n, a, lda, w, wi, vl, ldvl, vr, ldvr);
}

template <>
inline int geev(int matrix_layout, char jobvl, char jobvr, int n, double* a, int lda, double* w, double* wi, double* vl, int ldvl, double* vr, int ldvr) {
  return LAPACKE_dgeev(matrix_layout, jobvl, jobvr, n, a, lda, w, wi, vl, ldvl, vr, ldvr);
}

template <>
inline int geev(int matrix_layout, char jobvl, char jobvr, int n, Complex64* a, int lda, Complex64* w, Complex64* wi, Complex64* vl, int ldvl, Complex64* vr, int ldvr) {
  return LAPACKE_cgeev(matrix_layout, jobvl, jobvr, n, a, lda, w, vl, ldvl, vr, ldvr);
}

template <>
inline int geev(int matrix_layout, char jobvl, char jobvr, int n, Complex128* a, int lda, Complex128* w, Complex128* wi, Complex128* vl, int ldvl, Complex128* vr, int ldvr) {
  return LAPACKE_zgeev(matrix_layout, jobvl, jobvr, n, a, lda, w, vl, ldvl, vr, ldvr);
}

template <typename DType>
inline int lapacke_geev(int matrix_layout, char jobvl, char jobvr, int n, void* a, int lda, void* w, void* wi, void* vl, int ldvl, void* vr, int ldvr) {
  return geev<DType>(matrix_layout, jobvl, jobvr, n, static_cast<DType*>(a), lda, static_cast<DType*>(w), static_cast<DType*>(wi), static_cast<DType*>(vl), ldvl, static_cast<DType*>(vr), ldvr);
}

}}}

#endif


================================================
FILE: ext/nmatrix_lapacke/math_lapacke.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == math_lapacke.cpp
//
// Ruby-exposed CBLAS and LAPACK functions that call BLAS
// and LAPACKE functions.
//

#include "data/data.h"

#include "lapacke_nmatrix.h"

#include "math_lapacke/cblas_local.h"

#include "math/util.h"

#include "math_lapacke/cblas_templates_lapacke.h"

#include "math_lapacke/lapacke_templates.h"


/*
 * Forward Declarations
 */

extern "C" {
  /* BLAS Level 1. */
  static VALUE nm_lapacke_cblas_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
  static VALUE nm_lapacke_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
  static VALUE nm_lapacke_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
  static VALUE nm_lapacke_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
  static VALUE nm_lapacke_cblas_rotg(VALUE self, VALUE ab);
  static VALUE nm_lapacke_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx);

  /* BLAS Level 2. */
  static VALUE nm_lapacke_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
                             VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);

  /* BLAS Level 3. */
  static VALUE nm_lapacke_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
                             VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
  static VALUE nm_lapacke_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
                             VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
  static VALUE nm_lapacke_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
                             VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
  static VALUE nm_lapacke_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
                             VALUE lda, VALUE beta, VALUE c, VALUE ldc);
  static VALUE nm_lapacke_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
                             VALUE lda, VALUE beta, VALUE c, VALUE ldc);

  /* LAPACK. */
  static VALUE nm_lapacke_lapacke_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
  static VALUE nm_lapacke_lapacke_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
  static VALUE nm_lapacke_lapacke_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
  static VALUE nm_lapacke_lapacke_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
  static VALUE nm_lapacke_lapacke_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
  static VALUE nm_lapacke_lapacke_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);

  static VALUE nm_lapacke_lapacke_geqrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE tau);
  static VALUE nm_lapacke_lapacke_ormqr(VALUE self, VALUE order, VALUE side, VALUE trans, VALUE m, VALUE n, VALUE k, VALUE a, VALUE lda, VALUE tau, VALUE c, VALUE ldc);
  static VALUE nm_lapacke_lapacke_unmqr(VALUE self, VALUE order, VALUE side, VALUE trans, VALUE m, VALUE n, VALUE k, VALUE a, VALUE lda, VALUE tau, VALUE c, VALUE ldc);


  static VALUE nm_lapacke_lapacke_gesvd(VALUE self, VALUE order, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE superb);
  static VALUE nm_lapacke_lapacke_gesdd(VALUE self, VALUE order, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt);
  static VALUE nm_lapacke_lapacke_geev(VALUE self, VALUE order, VALUE jobvl, VALUE jobvr, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr);
}

extern "C" {

///////////////////
// Ruby Bindings //
///////////////////

void nm_math_init_lapack() {

  VALUE cNMatrix_LAPACKE = rb_define_module_under(cNMatrix, "LAPACKE");

  VALUE cNMatrix_LAPACKE_LAPACK = rb_define_module_under(cNMatrix_LAPACKE, "LAPACK");
  VALUE cNMatrix_LAPACKE_BLAS = rb_define_module_under(cNMatrix_LAPACKE, "BLAS");

  //BLAS Level 1
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_scal", (METHOD)nm_lapacke_cblas_scal, 4);
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_nrm2", (METHOD)nm_lapacke_cblas_nrm2, 3);
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_asum", (METHOD)nm_lapacke_cblas_asum, 3);
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_rot",  (METHOD)nm_lapacke_cblas_rot,  7);
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_rotg", (METHOD)nm_lapacke_cblas_rotg, 1);
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_imax", (METHOD)nm_lapacke_cblas_imax, 3);

  //BLAS Level 2
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_gemv", (METHOD)nm_lapacke_cblas_gemv, 11);

  //BLAS Level 3
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_gemm", (METHOD)nm_lapacke_cblas_gemm, 14);
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_trsm", (METHOD)nm_lapacke_cblas_trsm, 12);
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_trmm", (METHOD)nm_lapacke_cblas_trmm, 12);
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_syrk", (METHOD)nm_lapacke_cblas_syrk, 11);
  rb_define_singleton_method(cNMatrix_LAPACKE_BLAS, "cblas_herk", (METHOD)nm_lapacke_cblas_herk, 11);

  /* LAPACK Functions */
  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_getrf", (METHOD)nm_lapacke_lapacke_getrf, 5);
  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_getrs", (METHOD)nm_lapacke_lapacke_getrs, 9);
  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_getri", (METHOD)nm_lapacke_lapacke_getri, 5);
  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_potrf", (METHOD)nm_lapacke_lapacke_potrf, 5);
  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_potrs", (METHOD)nm_lapacke_lapacke_potrs, 8);
  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_potri", (METHOD)nm_lapacke_lapacke_potri, 5);

  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_geqrf", (METHOD)nm_lapacke_lapacke_geqrf, 6);
  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_ormqr", (METHOD)nm_lapacke_lapacke_ormqr, 11);
  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_unmqr", (METHOD)nm_lapacke_lapacke_unmqr, 11);

  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_gesvd", (METHOD)nm_lapacke_lapacke_gesvd, 13);
  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_gesdd", (METHOD)nm_lapacke_lapacke_gesdd, 11);
  rb_define_singleton_method(cNMatrix_LAPACKE_LAPACK, "lapacke_geev", (METHOD)nm_lapacke_lapacke_geev, 12);
}

/*
 * call-seq:
 *     NMatrix::BLAS.cblas_scal(n, alpha, vector, inc) -> NMatrix
 *
 * BLAS level 1 function +scal+. Works with all dtypes.
 *
 * Scale +vector+ in-place by +alpha+ and also return it. The operation is as
 * follows:
 *  x <- alpha * x
 *
 * - +n+ -> Number of elements of +vector+.
 * - +alpha+ -> Scalar value used in the operation.
 * - +vector+ -> NMatrix of shape [n,1] or [1,n]. Modified in-place.
 * - +inc+ -> Increment used in the scaling function. Should generally be 1.
 */
static VALUE nm_lapacke_cblas_scal(VALUE self, VALUE n, VALUE alpha, VALUE vector, VALUE incx) {
  nm::dtype_t dtype = NM_DTYPE(vector);

  void* scalar = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  rubyval_to_cval(alpha, dtype, scalar);

  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_scal, void, const int n,
      const void* scalar, void* x, const int incx);

  ttable[dtype](FIX2INT(n), scalar, NM_STORAGE_DENSE(vector)->elements,
      FIX2INT(incx));

  return vector;
}

/*
 * Call any of the cblas_xrotg functions as directly as possible.
 *
 * xROTG computes the elements of a Givens plane rotation matrix such that:
 *
 *  |  c s |   | a |   | r |
 *  | -s c | * | b | = | 0 |
 *
 * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
 *
 * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
 *
 * This function differs from most of the other raw BLAS accessors. Instead of
 * providing a, b, c, s as arguments, you should only provide a and b (the
 * inputs), and you should provide them as the first two elements of any dense
 * NMatrix type.
 *
 * The outputs [c,s] will be returned in a Ruby Array at the end; the input
 * NMatrix will also be modified in-place.
 *
 * This function, like the other cblas_ functions, does minimal type-checking.
 */
static VALUE nm_lapacke_cblas_rotg(VALUE self, VALUE ab) {
  static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
      NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
      nm::math::lapacke::cblas_rotg<float>,
      nm::math::lapacke::cblas_rotg<double>,
      nm::math::lapacke::cblas_rotg<nm::Complex64>,
      nm::math::lapacke::cblas_rotg<nm::Complex128>,
      NULL //nm::math::lapacke::cblas_rotg<nm::RubyObject>
  };

  nm::dtype_t dtype = NM_DTYPE(ab);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
    return Qnil;

  } else {
    NM_CONSERVATIVE(nm_register_value(&self));
    NM_CONSERVATIVE(nm_register_value(&ab));
    void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
         *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);

    // extract A and B from the NVector (first two elements)
    void* pA = NM_STORAGE_DENSE(ab)->elements;
    void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
    // c and s are output

    ttable[dtype](pA, pB, pC, pS);

    VALUE result = rb_ary_new2(2);

    if (dtype == nm::RUBYOBJ) {
      rb_ary_store(result, 0, *reinterpret_cast<VALUE*>(pC));
      rb_ary_store(result, 1, *reinterpret_cast<VALUE*>(pS));
    } else {
      rb_ary_store(result, 0, nm::rubyobj_from_cval(pC, dtype).rval);
      rb_ary_store(result, 1, nm::rubyobj_from_cval(pS, dtype).rval);
    }
    NM_CONSERVATIVE(nm_unregister_value(&ab));
    NM_CONSERVATIVE(nm_unregister_value(&self));
    return result;
  }
}


/*
 * Call any of the cblas_xrot functions as directly as possible.
 *
 * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
 *
 * It's tough to find documentation on xROT. Here are what we think the arguments are for:
 *  * n     :: number of elements to consider in x and y
 *  * x     :: a vector (expects an NVector)
 *  * incx  :: stride of x
 *  * y     :: a vector (expects an NVector)
 *  * incy  :: stride of y
 *  * c     :: cosine of the angle of rotation
 *  * s     :: sine of the angle of rotation
 *
 * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
 * will be float for Complex64 or double for Complex128.
 *
 * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_lapacke_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
  static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
      NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
      nm::math::lapacke::cblas_rot<float,float>,
      nm::math::lapacke::cblas_rot<double,double>,
      nm::math::lapacke::cblas_rot<nm::Complex64,float>,
      nm::math::lapacke::cblas_rot<nm::Complex128,double>,
      nm::math::lapacke::cblas_rot<nm::RubyObject,nm::RubyObject>
  };

  nm::dtype_t dtype = NM_DTYPE(x);


  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
    return Qfalse;
  } else {
    void *pC, *pS;

    // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
    if (dtype == nm::COMPLEX64) {
      pC = NM_ALLOCA_N(float,1);
      pS = NM_ALLOCA_N(float,1);
      rubyval_to_cval(c, nm::FLOAT32, pC);
      rubyval_to_cval(s, nm::FLOAT32, pS);
    } else if (dtype == nm::COMPLEX128) {
      pC = NM_ALLOCA_N(double,1);
      pS = NM_ALLOCA_N(double,1);
      rubyval_to_cval(c, nm::FLOAT64, pC);
      rubyval_to_cval(s, nm::FLOAT64, pS);
    } else {
      pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
      pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
      rubyval_to_cval(c, dtype, pC);
      rubyval_to_cval(s, dtype, pS);
    }


    ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);

    return Qtrue;
  }
}


/*
 * Call any of the cblas_xnrm2 functions as directly as possible.
 *
 * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
 *
 * Arguments:
 *  * n     :: length of x, must be at least 0
 *  * x     :: pointer to first entry of input vector
 *  * incx  :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
 *
 * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_lapacke_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {

  static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
      NULL, NULL, NULL, NULL, NULL, // no help for integers
      nm::math::lapacke::cblas_nrm2<float32_t>,
      nm::math::lapacke::cblas_nrm2<float64_t>,
      nm::math::lapacke::cblas_nrm2<nm::Complex64>,
      nm::math::lapacke::cblas_nrm2<nm::Complex128>,
      nm::math::lapacke::cblas_nrm2<nm::RubyObject>
  };

  nm::dtype_t dtype  = NM_DTYPE(x);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this operation undefined for integer vectors");
    return Qnil;

  } else {
    // Determine the return dtype and allocate it
    nm::dtype_t rdtype = dtype;
    if      (dtype == nm::COMPLEX64)  rdtype = nm::FLOAT32;
    else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;

    void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);

    ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);

    return nm::rubyobj_from_cval(Result, rdtype).rval;
  }
}


/*
 * Call any of the cblas_xasum functions as directly as possible.
 *
 * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
 * of a vector x.
 *
 * Arguments:
 *  * n     :: length of x, must be at least 0
 *  * x     :: pointer to first entry of input vector
 *  * incx  :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
 *
 * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_lapacke_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {

  static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
      nm::math::lapacke::cblas_asum<uint8_t>,
      nm::math::lapacke::cblas_asum<int8_t>,
      nm::math::lapacke::cblas_asum<int16_t>,
      nm::math::lapacke::cblas_asum<int32_t>,
      nm::math::lapacke::cblas_asum<int64_t>,
      nm::math::lapacke::cblas_asum<float32_t>,
      nm::math::lapacke::cblas_asum<float64_t>,
      nm::math::lapacke::cblas_asum<nm::Complex64>,
      nm::math::lapacke::cblas_asum<nm::Complex128>,
      nm::math::lapacke::cblas_asum<nm::RubyObject>
  };

  nm::dtype_t dtype  = NM_DTYPE(x);

  // Determine the return dtype and allocate it
  nm::dtype_t rdtype = dtype;
  if      (dtype == nm::COMPLEX64)  rdtype = nm::FLOAT32;
  else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;

  void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);

  ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);

  return nm::rubyobj_from_cval(Result, rdtype).rval;
}

/*
 * call-seq:
 *    NMatrix::BLAS.cblas_imax(n, vector, inc) -> Fixnum
 *
 * BLAS level 1 routine.
 *
 * Return the index of the largest element of +vector+.
 *
 * - +n+ -> Vector's size. Generally, you can use NMatrix#rows or NMatrix#cols.
 * - +vector+ -> A NMatrix of shape [n,1] or [1,n] with any dtype.
 * - +inc+ -> It's the increment used when searching. Use 1 except if you know
 *   what you're doing.
 */
static VALUE nm_lapacke_cblas_imax(VALUE self, VALUE n, VALUE x, VALUE incx) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_imax, int, const int n, const void* x, const int incx);

  nm::dtype_t dtype = NM_DTYPE(x);

  int index = ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx));

  // Convert to Ruby's Int value.
  return INT2FIX(index);
}

/* Call any of the cblas_xgemv functions as directly as possible.
 *
 * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
 *
 *    y = alpha*op(A)*x + beta*y
 *
 * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
 *
 * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
 * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
 * expose the ultra-optimized ATLAS versions.
 *
 * == Arguments
 * See: http://www.netlib.org/blas/dgemm.f
 *
 * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_lapacke_cblas_gemv(VALUE self,
                           VALUE trans_a,
                           VALUE m, VALUE n,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE x, VALUE incx,
                           VALUE beta,
                           VALUE y, VALUE incy)
{
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_gemv, bool, const enum CBLAS_TRANSPOSE, const int, const int, const void*, const void*, const int, const void*, const int, const void*, void*, const int)

  nm::dtype_t dtype = NM_DTYPE(a);

  void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
       *pBeta  = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  rubyval_to_cval(alpha, dtype, pAlpha);
  rubyval_to_cval(beta, dtype, pBeta);

  return ttable[dtype](blas_transpose_sym(trans_a), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), pBeta, NM_STORAGE_DENSE(y)->elements, FIX2INT(incy)) ? Qtrue : Qfalse;
}

/* Call any of the cblas_xgemm functions as directly as possible.
 *
 * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
 *
 *    C = alpha*op(A)*op(B) + beta*C
 *
 * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
 *
 * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
 * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
 * expose the ultra-optimized ATLAS versions.
 *
 * == Arguments
 * See: http://www.netlib.org/blas/dgemm.f
 *
 * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
 * with its arguments?
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_lapacke_cblas_gemm(VALUE self,
                           VALUE order,
                           VALUE trans_a, VALUE trans_b,
                           VALUE m, VALUE n, VALUE k,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE b, VALUE ldb,
                           VALUE beta,
                           VALUE c, VALUE ldc)
{
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::lapacke::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);

  nm::dtype_t dtype = NM_DTYPE(a);

  void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
       *pBeta  = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
  rubyval_to_cval(alpha, dtype, pAlpha);
  rubyval_to_cval(beta, dtype, pBeta);

  ttable[dtype](blas_order_sym(order), blas_transpose_sym(trans_a), blas_transpose_sym(trans_b), FIX2INT(m), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));

  return c;
}


static VALUE nm_lapacke_cblas_trsm(VALUE self,
                           VALUE order,
                           VALUE side, VALUE uplo,
                           VALUE trans_a, VALUE diag,
                           VALUE m, VALUE n,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE b, VALUE ldb)
{
  static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
                                        const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
                                        const int m, const int n, const void* alpha, const void* a,
                                        const int lda, void* b, const int ldb) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::lapacke::cblas_trsm<float>,
      nm::math::lapacke::cblas_trsm<double>,
      cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
      nm::math::lapacke::cblas_trsm<nm::RubyObject>
  };

  nm::dtype_t dtype = NM_DTYPE(a);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
    rubyval_to_cval(alpha, dtype, pAlpha);

    ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
  }

  return Qtrue;
}

static VALUE nm_lapacke_cblas_trmm(VALUE self,
                           VALUE order,
                           VALUE side, VALUE uplo,
                           VALUE trans_a, VALUE diag,
                           VALUE m, VALUE n,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE b, VALUE ldb)
{
  static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
                                        const enum CBLAS_SIDE, const enum CBLAS_UPLO,
                                        const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
                                        const int m, const int n, const void* alpha, const void* a,
                                        const int lda, void* b, const int ldb) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::lapacke::cblas_trmm<float>,
      nm::math::lapacke::cblas_trmm<double>,
      cblas_ctrmm, cblas_ztrmm, // call directly, same function signature!
      NULL
  };

  nm::dtype_t dtype = NM_DTYPE(a);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
  } else {
    void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
    rubyval_to_cval(alpha, dtype, pAlpha);

    ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
  }

  return b;
}

static VALUE nm_lapacke_cblas_syrk(VALUE self,
                           VALUE order,
                           VALUE uplo,
                           VALUE trans,
                           VALUE n, VALUE k,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE beta,
                           VALUE c, VALUE ldc)
{
  static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
                                        const int n, const int k, const void* alpha, const void* a,
                                        const int lda, const void* beta, void* c, const int ldc) = {
      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
      nm::math::lapacke::cblas_syrk<float>,
      nm::math::lapacke::cblas_syrk<double>,
      cblas_csyrk, cblas_zsyrk, // call directly, same function signature!
      NULL
  };

  nm::dtype_t dtype = NM_DTYPE(a);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
         *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
    rubyval_to_cval(alpha, dtype, pAlpha);
    rubyval_to_cval(beta, dtype, pBeta);

    ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
  }

  return Qtrue;
}

static VALUE nm_lapacke_cblas_herk(VALUE self,
                           VALUE order,
                           VALUE uplo,
                           VALUE trans,
                           VALUE n, VALUE k,
                           VALUE alpha,
                           VALUE a, VALUE lda,
                           VALUE beta,
                           VALUE c, VALUE ldc)
{

  nm::dtype_t dtype = NM_DTYPE(a);

  if (dtype == nm::COMPLEX64) {
    cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
  } else if (dtype == nm::COMPLEX128) {
    cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
  } else
    rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
  return Qtrue;
}

/* Call any of the lapacke_xgetri functions as directly as possible.
 *
 * This version (the LAPACKE version) differs from the CLAPACK version in terms of the
 * input it expects (which is the output of getrf). See getrf for details.
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 *
 * Returns an array giving the pivot indices (normally these are argument #5).
 */
static VALUE nm_lapacke_lapacke_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
      NULL, NULL, NULL, NULL, NULL,
      nm::math::lapacke::lapacke_getri<float>,
      nm::math::lapacke::lapacke_getri<double>,
      nm::math::lapacke::lapacke_getri<nm::Complex64>,
      nm::math::lapacke::lapacke_getri<nm::Complex128>,
      NULL
  };

  // Allocate the C version of the pivot index array
  int* ipiv_;
  if (!RB_TYPE_P(ipiv, T_ARRAY)) {
    rb_raise(rb_eArgError, "ipiv must be of type Array");
  } else {
    ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
    for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
      ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
    }
  }

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
  } else {
    ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
  }

  return a;
}

/* Call any of the lapacke_xgetrf functions as directly as possible.
 *
 * The lapacke_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
 * matrix A using partial pivoting with row interchanges.
 *
 * The factorization has the form:
 *    A = P * L * U
 * where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n),
 * and U is upper triangular (upper trapezoidal if m < n).
 *
 * This version of getrf (the LAPACKE one) differs from the CLAPACK version. The CLAPACK has
 * different behavior for row-major matrices (the upper matrix has unit diagonals instead of
 * the lower and it uses column permutations instead of rows).
 *
 * This is the right-looking level 3 BLAS version of the algorithm.
 *
 * == Arguments
 * See: http://www.netlib.org/lapack/double/dgetrf.f
 * (You don't need argument 5; this is the value returned by this function.)
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 *
 * Returns an array giving the pivot indices (normally these are argument #5).
 */
static VALUE nm_lapacke_lapacke_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int m, const int n, void* a, const int lda, int* ipiv) = {
      NULL, NULL, NULL, NULL, NULL,
      nm::math::lapacke::lapacke_getrf<float>,
      nm::math::lapacke::lapacke_getrf<double>,
      nm::math::lapacke::lapacke_getrf<nm::Complex64>,
      nm::math::lapacke::lapacke_getrf<nm::Complex128>,
      NULL
  };

  int M = FIX2INT(m),
      N = FIX2INT(n);

  // Allocate the pivot index array, which is of size MIN(M, N).
  size_t ipiv_size = std::min(M,N);
  int* ipiv = NM_ALLOCA_N(int, ipiv_size);

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
  }

  // Result will be stored in a. We return ipiv as an array.
  VALUE ipiv_array = rb_ary_new2(ipiv_size);
  for (size_t i = 0; i < ipiv_size; ++i) {
    rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
  }

  return ipiv_array;
}

/*
 * Call any of the lapacke_xgetrs functions as directly as possible.
 */
static VALUE nm_lapacke_lapacke_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, char Trans, const int N,
                                       const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
                                       const int ldb) = {
      NULL, NULL, NULL, NULL, NULL,
      nm::math::lapacke::lapacke_getrs<float>,
      nm::math::lapacke::lapacke_getrs<double>,
      nm::math::lapacke::lapacke_getrs<nm::Complex64>,
      nm::math::lapacke::lapacke_getrs<nm::Complex128>,
      NULL
  };

  // Allocate the C version of the pivot index array
  int* ipiv_;
  if (!RB_TYPE_P(ipiv, T_ARRAY)) {
    rb_raise(rb_eArgError, "ipiv must be of type Array");
  } else {
    ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
    for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
      ipiv_[index] = FIX2INT( RARRAY_AREF(ipiv, index) );
    }
  }

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {
    ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
                        ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
  }

  // b is both returned and modified directly in the argument list.
  return b;
}

/* Call any of the LAPACKE_xpotrf functions as directly as possible.
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_lapacke_lapacke_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {

  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, char, const int n, void* a, const int lda) = {
      NULL, NULL, NULL, NULL, NULL,
      nm::math::lapacke::lapacke_potrf<float>,
      nm::math::lapacke::lapacke_potrf<double>,
      nm::math::lapacke::lapacke_potrf<nm::Complex64>,
      nm::math::lapacke::lapacke_potrf<nm::Complex128>,
      NULL
  };

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
  } else {
    ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
  }

  return a;
}

/*
 * Call any of the LAPACKE_xpotrs functions as directly as possible.
 */
static VALUE nm_lapacke_lapacke_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, char Uplo, const int N,
                                       const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
      NULL, NULL, NULL, NULL, NULL,
      nm::math::lapacke::lapacke_potrs<float>,
      nm::math::lapacke::lapacke_potrs<double>,
      nm::math::lapacke::lapacke_potrs<nm::Complex64>,
      nm::math::lapacke::lapacke_potrs<nm::Complex128>,
      NULL
  };


  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
  } else {

    ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
                        NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
  }

  // b is both returned and modified directly in the argument list.
  return b;
}

/* Call any of the lapacke_xpotri functions as directly as possible.
 *
 * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
 * handling, so you can easily crash Ruby!
 */
static VALUE nm_lapacke_lapacke_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {

  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, char, const int n, void* a, const int lda) = {
      NULL, NULL, NULL, NULL, NULL,
      nm::math::lapacke::lapacke_potri<float>,
      nm::math::lapacke::lapacke_potri<double>,
      nm::math::lapacke::lapacke_potri<nm::Complex64>,
      nm::math::lapacke::lapacke_potri<nm::Complex128>,
      NULL
  };

  if (!ttable[NM_DTYPE(a)]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
  } else {
    ttable[NM_DTYPE(a)](blas_order_sym(order), lapacke_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
  }

  return a;
}

//badly need docs for gesvd, gesdd because of the real/complex mixing

/*
 * xGESVD computes the singular value decomposition (SVD) of a real
 * M-by-N matrix A, optionally computing the left and/or right singular
 * vectors. The SVD is written
 *
 *      A = U * SIGMA * transpose(V)
 *
 * where SIGMA is an M-by-N matrix which is zero except for its
 * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
 * V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
 * are the singular values of A; they are real and non-negative, and
 * are returned in descending order.  The first min(m,n) columns of
 * U and V are the left and right singular vectors of A.
 *
 * Note that the routine returns V**T, not V.
 */
static VALUE nm_lapacke_lapacke_gesvd(VALUE self, VALUE order, VALUE jobu, VALUE jobvt, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt, VALUE superb) {
  static int (*gesvd_table[nm::NUM_DTYPES])(int, char, char, int, int, void* a, int, void* s, void* u, int, void* vt, int, void* superb) = {
    NULL, NULL, NULL, NULL, NULL, // no integer ops
    nm::math::lapacke::lapacke_gesvd<float,float>,
    nm::math::lapacke::lapacke_gesvd<double,double>,
    nm::math::lapacke::lapacke_gesvd<nm::Complex64,float>,
    nm::math::lapacke::lapacke_gesvd<nm::Complex128,double>,
    NULL // no Ruby objects
  };

  nm::dtype_t dtype = NM_DTYPE(a);


  if (!gesvd_table[dtype]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
    return Qfalse;
  } else {
    int M = FIX2INT(m),
        N = FIX2INT(n);

    char JOBU = lapack_svd_job_sym(jobu),
         JOBVT = lapack_svd_job_sym(jobvt);

    int info = gesvd_table[dtype](blas_order_sym(order),JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
      NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
      NM_STORAGE_DENSE(superb)->elements);
    return INT2FIX(info);
  }
}

static VALUE nm_lapacke_lapacke_gesdd(VALUE self, VALUE order, VALUE jobz, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE s, VALUE u, VALUE ldu, VALUE vt, VALUE ldvt) {
  static int (*gesdd_table[nm::NUM_DTYPES])(int, char, int, int, void* a, int, void* s, void* u, int, void* vt, int) = {
    NULL, NULL, NULL, NULL, NULL, // no integer ops
    nm::math::lapacke::lapacke_gesdd<float,float>,
    nm::math::lapacke::lapacke_gesdd<double,double>,
    nm::math::lapacke::lapacke_gesdd<nm::Complex64,float>,
    nm::math::lapacke::lapacke_gesdd<nm::Complex128,double>,
    NULL // no Ruby objects
  };

  nm::dtype_t dtype = NM_DTYPE(a);


  if (!gesdd_table[dtype]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
    return Qfalse;
  } else {
    int M = FIX2INT(m),
        N = FIX2INT(n);

    char JOBZ = lapack_svd_job_sym(jobz);

    int info = gesdd_table[dtype](blas_order_sym(order),JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
      NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt));
    return INT2FIX(info);
  }
}

/*
 * GEEV computes for an N-by-N real nonsymmetric matrix A, the
 * eigenvalues and, optionally, the left and/or right eigenvectors.
 *
 * The right eigenvector v(j) of A satisfies
 *                    A * v(j) = lambda(j) * v(j)
 * where lambda(j) is its eigenvalue.
 *
 * The left eigenvector u(j) of A satisfies
 *                 u(j)**H * A = lambda(j) * u(j)**H
 * where u(j)**H denotes the conjugate transpose of u(j).
 *
 * The computed eigenvectors are normalized to have Euclidean norm
 * equal to 1 and largest component real.
 */
//note on wi
static VALUE nm_lapacke_lapacke_geev(VALUE self, VALUE order, VALUE jobvl, VALUE jobvr, VALUE n, VALUE a, VALUE lda, VALUE w, VALUE wi, VALUE vl, VALUE ldvl, VALUE vr, VALUE ldvr) {
  static int (*geev_table[nm::NUM_DTYPES])(int, char, char, int, void* a, int, void* w, void* wi, void* vl, int, void* vr, int) = {
    NULL, NULL, NULL, NULL, NULL, // no integer ops
    nm::math::lapacke::lapacke_geev<float>,
    nm::math::lapacke::lapacke_geev<double>,
    nm::math::lapacke::lapacke_geev<nm::Complex64>,
    nm::math::lapacke::lapacke_geev<nm::Complex128>,
    NULL // no Ruby objects
  };

  nm::dtype_t dtype = NM_DTYPE(a);


  if (!geev_table[dtype]) {
    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
    return Qfalse;
  } else {
    int N = FIX2INT(n);

    char JOBVL = lapack_evd_job_sym(jobvl),
         JOBVR = lapack_evd_job_sym(jobvr);

    void* A  = NM_STORAGE_DENSE(a)->elements;
    void* W = NM_STORAGE_DENSE(w)->elements;
    void* WI = wi == Qnil ? NULL : NM_STORAGE_DENSE(wi)->elements; //For complex, wi should be nil
    void* VL = JOBVL == 'V' ? NM_STORAGE_DENSE(vl)->elements : NULL;
    void* VR = JOBVR == 'V' ? NM_STORAGE_DENSE(vr)->elements : NULL;

    // Perform the actual calculation.
    int info = geev_table[dtype](blas_order_sym(order), JOBVL, JOBVR, N, A, FIX2INT(lda), W, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr));

    return INT2FIX(info);
  }
}

/* 
 * GEQRF calculates the QR factorization for an MxN real or complex matrix.
 *  
 * The QR factorization is A = QR, where Q is orthogonal and R is Upper Triangular
 * +A+ is overwritten with the elements of R and Q with Q being represented by the 
 * elements below A's diagonal and an array of scalar factors in the output NMatrix. 
 *
 * The matrix Q is represented as a product of elementary reflectors
 *     Q = H(1) H(2) . . . H(k), where k = min(m,n).
 *
 * Each H(i) has the form
 *
 *     H(i) = I - tau * v * v'
 *
 * http://www.netlib.org/lapack/explore-html/d3/d69/dgeqrf_8f.html
 */

static VALUE nm_lapacke_lapacke_geqrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda, VALUE tau) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER order, const int m, const int n, void* a, const int lda, void* tau) = {
      NULL, NULL, NULL, NULL, NULL,
      nm::math::lapacke::lapacke_geqrf<float>,
      nm::math::lapacke::lapacke_geqrf<double>,
      nm::math::lapacke::lapacke_geqrf<nm::Complex64>,
      nm::math::lapacke::lapacke_geqrf<nm::Complex128>,
      NULL
  };

  int M = FIX2INT(m),
      N = FIX2INT(n);
 
  nm::dtype_t dtype = NM_DTYPE(a);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this matrix operation is undefined for integer matrices");
    return Qfalse;
  } else {
    int info = ttable[dtype](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(tau)->elements);
    return INT2FIX(info);
  }
}

/* ORMQR calculates the orthogonal matrix Q from TAU and A after calling GEQRF on a real matrix
 *  
 *
 * The matrix Q is represented as a product of elementary reflectors
 *     Q = H(1) H(2) . . . H(k), where k = min(m,n).
 *
 * Each H(i) has the form
 *
 *     H(i) = I - tau * v * v'
 *  
 *  v is contained in the matrix passed to GEQRF     
 *
 *  www.netlib.org/lapack/explore-html/da/d82/dormqr_8f.html
 */

static VALUE nm_lapacke_lapacke_ormqr(VALUE self, VALUE order, VALUE side, VALUE trans, VALUE m, VALUE n, VALUE k, VALUE a, VALUE lda, VALUE tau, VALUE c, VALUE ldc) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER order, char side, char trans, const int m, const int n, const int k, void* a, const int lda, void* tau, void* c, const int ldc) = {
      NULL, NULL, NULL, NULL, NULL,
      nm::math::lapacke::lapacke_ormqr<float>,
      nm::math::lapacke::lapacke_ormqr<double>,
      NULL,NULL,NULL // no complex or Ruby objects
  };

  int M = FIX2INT(m),
      N = FIX2INT(n),
      K = FIX2INT(k); 

  char SIDE  = lapacke_side_sym(side),
       TRANS = lapacke_transpose_sym(trans);

  nm::dtype_t dtype = NM_DTYPE(a);


  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this matrix operation is undefined for integer matrices");
    return Qfalse;
  } else {
    int info = ttable[dtype](blas_order_sym(order), SIDE, TRANS, M, N, K, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(tau)->elements, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
    return INT2FIX(info);
  }
}

/* UNMQR calculates the orthogonal matrix Q from TAU and A after calling GEQRF on a complex matrix.
 *  
 *
 * The matrix Q is represented as a product of elementary reflectors
 *     Q = H(1) H(2) . . . H(k), where k = min(m,n).
 *
 * Each H(i) has the form
 *
 *     H(i) = I - tau * v * v'
 *  
 *  v is contained in the matrix passed to GEQRF     
 *
 *  http://www.netlib.org/lapack/explore-html/d5/d65/zunmqr_8f.html
 */

static VALUE nm_lapacke_lapacke_unmqr(VALUE self, VALUE order, VALUE side, VALUE trans, VALUE m, VALUE n, VALUE k, VALUE a, VALUE lda, VALUE tau, VALUE c, VALUE ldc) {
  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER order, char side, char trans, const int m, const int n, const int k, void* a, const int lda, void* tau, void* c, const int ldc) = {
      NULL, NULL, NULL, NULL, NULL,NULL,NULL, // no non-complex ops
      nm::math::lapacke::lapacke_unmqr<nm::Complex64>,
      nm::math::lapacke::lapacke_unmqr<nm::Complex128>,
      NULL // no Ruby objects
  };

  int M = FIX2INT(m),
      N = FIX2INT(n),
      K = FIX2INT(k); 

  char SIDE  = lapacke_side_sym(side),
       TRANS = lapacke_transpose_sym(trans);

  nm::dtype_t dtype = NM_DTYPE(a);

  if (!ttable[dtype]) {
    rb_raise(nm_eDataTypeError, "this matrix operation is valid only for complex datatypes");
    return Qfalse;
  } else {
    int info = ttable[dtype](blas_order_sym(order), SIDE, TRANS, M, N, K, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(tau)->elements, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
    return INT2FIX(info);
  }
}

}


================================================
FILE: ext/nmatrix_lapacke/nmatrix_lapacke.cpp
================================================
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == nmatrix_lapacke.cpp
//
// Main file for nmatrix_lapacke extension
//

#include <ruby.h>

#include "nmatrix.h"

#include "data/data.h"

extern "C" {
void nm_math_init_lapack(); 

void Init_nmatrix_lapacke() {
  nm_math_init_lapack();
}

}


================================================
FILE: lib/nmatrix/atlas.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2016, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2016, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == atlas.rb
#
# ruby file for the nmatrix-atlas gem. Loads the C extension and defines
# nice ruby interfaces for ATLAS functions.
#++

require 'nmatrix/nmatrix.rb'
 #need to have nmatrix required first or else bad things will happen
require_relative 'lapack_ext_common'

NMatrix.register_lapack_extension("nmatrix-atlas")

require "nmatrix_atlas.so"

class NMatrix

  #Add functions from the ATLAS C extension to the main LAPACK and BLAS modules.
  #This will overwrite the original functions where applicable.
  module LAPACK
    class << self
      NMatrix::ATLAS::LAPACK.singleton_methods.each do |m|
        define_method m, NMatrix::ATLAS::LAPACK.method(m).to_proc
      end
    end
  end

  module BLAS
    class << self
      NMatrix::ATLAS::BLAS.singleton_methods.each do |m|
        define_method m, NMatrix::ATLAS::BLAS.method(m).to_proc
      end
    end
  end

  module LAPACK
    class << self
      def posv(uplo, a, b)
        raise(ShapeError, "a must be square") unless a.dim == 2 \
         && a.shape[0] == a.shape[1]

        raise(ShapeError, "number of rows of b must equal number of cols of a") \
         unless a.shape[1] == b.shape[0]

        raise(StorageTypeError, "only works with dense matrices") \
         unless a.stype == :dense && b.stype == :dense

        raise(DataTypeError, "only works for non-integer, non-object dtypes") \
         if  a.integer_dtype? || a.object_dtype? || \
          b.integer_dtype? || b.object_dtype?

        x     = b.clone
        clone = a.clone
        n = a.shape[0]
        nrhs = b.shape[1]
        clapack_potrf(:row, uplo, n, clone, n)
        # Must transpose b before and after:
        #  http://math-atlas.sourceforge.net/faq.html#RowSolve
        x = x.transpose
        clapack_potrs(:row, uplo, n, nrhs, clone, n, x, n)
        x.transpose
      end

      def geev(matrix, which=:both)
        raise(StorageTypeError, "LAPACK functions only work on dense matrices") \
         unless matrix.dense?

        raise(ShapeError, "eigenvalues can only be computed for square matrices") \
         unless matrix.dim == 2 && matrix.shape[0] == matrix.shape[1]

        jobvl = (which == :both || which == :left) ? :t : false
        jobvr = (which == :both || which == :right) ? :t : false

        n = matrix.shape[0]

        # Outputs
        eigenvalues = NMatrix.new([n, 1], dtype: matrix.dtype)
         # For real dtypes this holds only the real part of the eigenvalues.
        imag_eigenvalues = matrix.complex_dtype? ? nil : NMatrix.new([n, 1], \
         dtype: matrix.dtype) # For complex dtypes, this is unused.
        left_output      = jobvl ? matrix.clone_structure : nil
        right_output     = jobvr ? matrix.clone_structure : nil

        # lapack_geev is a pure LAPACK routine so it expects column-major matrices,
        # so we need to transpose the input as well as the output.
        temporary_matrix = matrix.transpose
        NMatrix::LAPACK::lapack_geev(jobvl, # compute left eigenvectors of A?
                                     jobvr, # compute right eigenvectors of A? (left eigenvectors of A**T)
                                     n, # order of the matrix
                                     temporary_matrix,# input matrix (used as work)
                                     n, # leading dimension of matrix
                                     eigenvalues,# real part of computed eigenvalues
                                     imag_eigenvalues,# imag part of computed eigenvalues
                                     left_output,     # left eigenvectors, if applicable
                                     n, # leading dimension of left_output
                                     right_output,    # right eigenvectors, if applicable
                                     n, # leading dimension of right_output
                                     2*n)
        left_output = left_output.transpose if jobvl
        right_output = right_output.transpose if jobvr


        # For real dtypes, transform left_output and right_output into correct forms.
        # If the j'th and the (j+1)'th eigenvalues form a complex conjugate
        # pair, then the j'th and (j+1)'th columns of the matrix are
        # the real and imag parts of the eigenvector corresponding
        # to the j'th eigenvalue.
        if !matrix.complex_dtype?
          complex_indices = []
          n.times do |i|
            complex_indices << i if imag_eigenvalues[i] != 0.0
          end

          if !complex_indices.empty?
            # For real dtypes, put the real and imaginary parts together
            eigenvalues = eigenvalues + imag_eigenvalues * \
             Complex(0.0,1.0)
            left_output = left_output.cast(dtype: \
             NMatrix.upcast(:complex64, matrix.dtype)) if left_output
            right_output = right_output.cast(dtype: NMatrix.upcast(:complex64, \
             matrix.dtype)) if right_output
          end

          complex_indices.each_slice(2) do |i, _|
            if right_output
              right_output[0...n,i] = right_output[0...n,i] + \
               right_output[0...n,i+1] * Complex(0.0,1.0)
              right_output[0...n,i+1] = \
               right_output[0...n,i].complex_conjugate
            end

            if left_output
              left_output[0...n,i] = left_output[0...n,i] + \
               left_output[0...n,i+1] * Complex(0.0,1.0)
              left_output[0...n,i+1] = left_output[0...n,i].complex_conjugate
            end
          end
        end

        if which == :both
          return [eigenvalues, left_output, right_output]
        elsif which == :left
          return [eigenvalues, left_output]
        else
          return [eigenvalues, right_output]
        end
      end

      def gesvd(matrix, workspace_size=1)
        result = alloc_svd_result(matrix)

        m = matrix.shape[0]
        n = matrix.shape[1]

        # This is a pure LAPACK function so it expects column-major functions.
        # So we need to transpose the input as well as the output.
        matrix = matrix.transpose
        NMatrix::LAPACK::lapack_gesvd(:a, :a, m, n, matrix, \
         m, result[1], result[0], m, result[2], n, workspace_size)
        result[0] = result[0].transpose
        result[2] = result[2].transpose
        result
      end

      def gesdd(matrix, workspace_size=nil)
        min_workspace_size = matrix.shape.min * \
         (6 + 4 * matrix.shape.min) + matrix.shape.max
        workspace_size = min_workspace_size if \
         workspace_size.nil? || workspace_size < min_workspace_size

        result = alloc_svd_result(matrix)

        m = matrix.shape[0]
        n = matrix.shape[1]

        # This is a pure LAPACK function so it expects column-major functions.
        # So we need to transpose the input as well as the output.
        matrix = matrix.transpose
        NMatrix::LAPACK::lapack_gesdd(:a, m, n, matrix, m, result[1], \
         result[0], m, result[2], n, workspace_size)
        result[0] = result[0].transpose
        result[2] = result[2].transpose
        result
      end
    end
  end

  def invert!
    raise(StorageTypeError, "invert only works on dense matrices currently") \
     unless self.dense?

    raise(ShapeError, "Cannot invert non-square matrix") \
     unless shape[0] == shape[1]

    raise(DataTypeError, "Cannot invert an integer matrix in-place") \
     if self.integer_dtype?

    # Even though we are using the ATLAS plugin, we still might be missing
    # CLAPACK (and thus clapack_getri) if we are on OS X.
    if NMatrix.has_clapack?
      # Get the pivot array; factor the matrix
      # We can't used getrf! here since it doesn't have the clapack behavior,
      # so it doesn't play nicely with clapack_getri
      n = self.shape[0]
      pivot = NMatrix::LAPACK::clapack_getrf(:row, n, n, self, n)
      # Now calculate the inverse using the pivot array
      NMatrix::LAPACK::clapack_getri(:row, n, self, n, pivot)
      self
    else
      __inverse__(self,true)
    end
  end

  def potrf!(which)
    raise(StorageTypeError, "ATLAS functions only work on dense matrices") \
     unless self.dense?
    raise(ShapeError, "Cholesky decomposition only valid for square matrices") \
     unless self.dim == 2 && self.shape[0] == self.shape[1]

    NMatrix::LAPACK::clapack_potrf(:row, which, self.shape[0], self, self.shape[1])
  end
end


================================================
FILE: lib/nmatrix/blas.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2016, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2016, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == blas.rb
#
# This file contains the safer accessors for the BLAS functions
# supported by NMatrix.
#++

module NMatrix::BLAS

  #Add functions from C extension to main BLAS module
  class << self
    if jruby?
      # BLAS functionalities for JRuby need to be implemented
    else
      NMatrix::Internal::BLAS.singleton_methods.each do |m|
        define_method m, NMatrix::Internal::BLAS.method(m).to_proc
      end
    end
  end

  class << self
    #
    # call-seq:
    #     gemm(a, b) -> NMatrix
    #     gemm(a, b, c) -> NMatrix
    #     gemm(a, b, c, alpha, beta) -> NMatrix
    #
    # Updates the value of C via the matrix multiplication
    #   C = (alpha * A * B) + (beta * C)
    # where +alpha+ and +beta+ are scalar values.
    #
    # * *Arguments* :
    #   - +a+ -> Matrix A.
    #   - +b+ -> Matrix B.
    #   - +c+ -> Matrix C.
    #   - +alpha+ -> A scalar value that multiplies A * B.
    #   - +beta+ -> A scalar value that multiplies C.
    #   - +transpose_a+ ->
    #   - +transpose_b+ ->
    #   - +m+ ->
    #   - +n+ ->
    #   - +k+ ->
    #   - +lda+ ->
    #   - +ldb+ ->
    #   - +ldc+ ->
    # * *Returns* :
    #   - A NMatrix equal to (alpha * A * B) + (beta * C).
    # * *Raises* :
    #   - +ArgumentError+ -> +a+ and +b+ must be dense matrices.
    #   - +ArgumentError+ -> +c+ must be +nil+ or a dense matrix.
    #   - +ArgumentError+ -> The dtype of the matrices must be equal.
    #
    def gemm(a, b, c = nil, alpha = 1.0, beta = 0.0,
             transpose_a = false, transpose_b = false, m = nil,
             n = nil, k = nil, lda = nil, ldb = nil, ldc = nil)

      raise(ArgumentError, 'Expected dense NMatrices as first two arguments.') \
            unless a.is_a?(NMatrix) and b.is_a? \
            (NMatrix) and a.stype == :dense and b.stype == :dense

      raise(ArgumentError, 'Expected nil or dense NMatrix as third argument.') \
            unless c.nil? or (c.is_a?(NMatrix)  \
            and c.stype == :dense)
      raise(ArgumentError, 'NMatrix dtype mismatch.') \
            unless a.dtype == b.dtype and (c ? a.dtype == c.dtype : true)

      # First, set m, n, and k, which depend on whether we're taking the
      # transpose of a and b.
      if c
        m ||= c.shape[0]
        n ||= c.shape[1]
        k ||= transpose_a ? a.shape[0] : a.shape[1]

      else
        if transpose_a
          # Either :transpose or :complex_conjugate.
          m ||= a.shape[1]
          k ||= a.shape[0]

        else
          # No transpose.
          m ||= a.shape[0]
          k ||= a.shape[1]
        end

        n ||= transpose_b ? b.shape[0] : b.shape[1]
        c  = NMatrix.new([m, n], dtype: a.dtype)
      end

      # I think these are independent of whether or not a transpose occurs.
      lda ||= a.shape[1]
      ldb ||= b.shape[1]
      ldc ||= c.shape[1]

      # NM_COMPLEX64 and NM_COMPLEX128 both require complex alpha and beta.
      if a.dtype == :complex64 or a.dtype == :complex128
        alpha = Complex(1.0, 0.0) if alpha == 1.0
        beta  = Complex(0.0, 0.0) if beta  == 0.0
      end

      # For argument descriptions, see: http://www.netlib.org/blas/dgemm.f
      ::NMatrix::BLAS.cblas_gemm(:row, transpose_a, transpose_b,
       m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)

      return c
    end

    #
    # call-seq:
    #     gemv(a, x) -> NMatrix
    #     gemv(a, x, y) -> NMatrix
    #     gemv(a, x, y, alpha, beta) -> NMatrix
    #
    # Implements matrix-vector product via
    #   y = (alpha * A * x) + (beta * y)
    # where +alpha+ and +beta+ are scalar values.
    #
    # * *Arguments* :
    #   - +a+ -> Matrix A.
    #   - +x+ -> Vector x.
    #   - +y+ -> Vector y.
    #   - +alpha+ -> A scalar value that multiplies A * x.
    #   - +beta+ -> A scalar value that multiplies y.
    #   - +transpose_a+ ->
    #   - +m+ ->
    #   - +n+ ->
    #   - +lda+ ->
    #   - +incx+ ->
    #   - +incy+ ->
    # * *Returns* :
    #   -
    # * *Raises* :
    #   - ++ ->
    #
    def gemv(a, x, y = nil, alpha = 1.0, beta = 0.0,
             transpose_a = false, m = nil, n = nil, lda = nil,
             incx = nil, incy = nil)
      raise(ArgumentError, 'Expected dense NMatrices as first two arguments.') \
       unless a.is_a?(NMatrix) and x.is_a?(NMatrix) and \
       a.stype == :dense and x.stype == :dense

      raise(ArgumentError, 'Expected nil or dense NMatrix as third argument.') \
       unless y.nil? or (y.is_a?(NMatrix) and y.stype == :dense)

      raise(ArgumentError, 'NMatrix dtype mismatch.') \
       unless a.dtype == x.dtype and (y ? a.dtype == y.dtype : true)

      m ||= transpose_a == :transpose ? a.shape[1] : a.shape[0]
      n ||= transpose_a == :transpose ? a.shape[0] : a.shape[1]
      raise(ArgumentError, "dimensions don't match") \
       unless x.shape[0] == n && x.shape[1] == 1

      if y
        raise(ArgumentError, "dimensions don't match") \
         unless y.shape[0] == m && y.shape[1] == 1
      else
        y = NMatrix.new([m,1], dtype: a.dtype)
      end

      lda  ||= a.shape[1]
      incx ||= 1
      incy ||= 1

      ::NMatrix::BLAS.cblas_gemv(transpose_a, m, n,
       alpha, a, lda, x, incx, beta, y, incy)

      return y
    end

    #
    # call-seq:
    #     rot(x, y, c, s) -> [NMatrix, NMatrix]
    #
    # Apply plane rotation.
    #
    # * *Arguments* :
    #   - +x+ -> NMatrix
    #   - +y+ -> NMatrix
    #   - +c+ -> cosine of the angle of rotation
    #   - +s+ -> sine of the angle of rotation
    #   - +incx+ -> stride of NMatrix +x+
    #   - +incy+ -> stride of NMatrix +y+
    #   - +n+ -> number of elements to consider in x and y
    #   - +in_place+ -> true   if it's okay to modify the supplied
    #                           +x+ and +y+ parameters directly;
    #                   false if not. Default is false.
    # * *Returns* :
    #   - Array with the results, in the format [xx, yy]
    # * *Raises* :
    #   - +ArgumentError+ -> Expected dense NMatrices as first two arguments.
    #   - +ArgumentError+ -> NMatrix dtype mismatch.
    #   - +ArgumentError+ -> Need to supply n for non-standard incx,
    #                         incy values.
    #
    def rot(x, y, c, s, incx = 1, incy = 1, n = nil, in_place=false)
      raise(ArgumentError, 'Expected dense NMatrices as first two arguments.') \
       unless x.is_a?(NMatrix) and y.is_a?(NMatrix) \
       and x.stype == :dense and y.stype == :dense

      raise(ArgumentError, 'NMatrix dtype mismatch.') \
       unless x.dtype == y.dtype

      raise(ArgumentError, 'Need to supply n for non-standard incx, incy values') \
       if n.nil? && incx != 1 && incx != -1 && incy != 1 && incy != -1

      n ||= [x.size/incx.abs, y.size/incy.abs].min

      if in_place
        ::NMatrix::BLAS.cblas_rot(n, x, incx, y, incy, c, s)
        return [x,y]
      else
        xx = x.clone
        yy = y.clone

        ::NMatrix::BLAS.cblas_rot(n, xx, incx, yy, incy, c, s)

        return [xx,yy]
      end
    end


    #
    # call-seq:
    #     rot!(x, y, c, s) -> [NMatrix, NMatrix]
    #
    # Apply plane rotation directly to +x+ and +y+.
    #
    # See rot for arguments.
    def rot!(x, y, c, s, incx = 1, incy = 1, n = nil)
      rot(x,y,c,s,incx,incy,n,true)
    end


    #
    # call-seq:
    #     rotg(ab) -> [Numeric, Numeric]
    #
    # Apply givens plane rotation to the coordinates (a,b),
    #  returning the cosine and sine of the angle theta.
    #
    # Since the givens rotation includes a square root,
    #  integers are disallowed.
    #
    # * *Arguments* :
    #   - +ab+ -> NMatrix with two elements
    # * *Returns* :
    #   - Array with the results, in the format [cos(theta), sin(theta)]
    # * *Raises* :
    #   - +ArgumentError+ -> Expected dense NMatrix of size 2
    #
    def rotg(ab)
      raise(ArgumentError, "Expected dense NMatrix of shape [2,1] or [1,2]") \
       unless ab.is_a?(NMatrix) && ab.stype == :dense && ab.size == 2

      ::NMatrix::BLAS.cblas_rotg(ab)
    end


    #
    # call-seq:
    #     asum(x, incx, n) -> Numeric
    #
    # Calculate the sum of absolute values of the entries of a
    #  vector +x+ of size +n+
    #
    # * *Arguments* :
    #   - +x+ -> an NMatrix (will also allow an NMatrix,
    #             but will treat it as if it's a vector )
    #   - +incx+ -> the skip size (defaults to 1)
    #   - +n+ -> the size of +x+ (defaults to +x.size / incx+)
    # * *Returns* :
    #   - The sum
    # * *Raises* :
    #   - +ArgumentError+ -> Expected dense NMatrix for arg 0
    #   - +RangeError+ -> n out of range
    #
    def asum(x, incx = 1, n = nil)
      n ||= x.size / incx
      raise(ArgumentError, "Expected dense NMatrix for arg 0") \
       unless x.is_a?(NMatrix)

      raise(RangeError, "n out of range") \
       if n*incx > x.size || n*incx <= 0 || n <= 0
       ::NMatrix::BLAS.cblas_asum(n, x, incx)
    end

    #
    # call-seq:
    #     nrm2(x, incx, n)
    #
    # Calculate the 2-norm of a vector +x+ of size +n+
    #
    # * *Arguments* :
    #   - +x+ -> an NMatrix (will also allow an
    #             NMatrix, but will treat it as if it's a vector )
    #   - +incx+ -> the skip size (defaults to 1)
    #   - +n+ -> the size of +x+ (defaults to +x.size / incx+)
    # * *Returns* :
    #   - The 2-norm
    # * *Raises* :
    #   - +ArgumentError+ -> Expected dense NMatrix for arg 0
    #   - +RangeError+ -> n out of range
    #
    def nrm2(x, incx = 1, n = nil)
      n ||= x.size / incx
      raise(ArgumentError, "Expected dense NMatrix for arg 0") \
       unless x.is_a?(NMatrix)

      raise(RangeError, "n out of range") \
       if n*incx > x.size || n*incx <= 0 || n <= 0
       ::NMatrix::BLAS.cblas_nrm2(n, x, incx)
    end

    #
    # call-seq:
    #     scal(alpha, vector, incx, n)
    #
    # Scale a matrix by a given scaling factor
    #
    # * *Arguments* :
    #   - +alpha+ -> a scaling factor
    #   - +vector+ -> an NMatrix
    #   - +incx+ -> the skip size (defaults to 1)
    #   - +n+ -> the size of +x+ (defaults to +x.size / incx+)
    # * *Returns* :
    #   - The scaling result
    # * *Raises* :
    #   - +ArgumentError+ -> Expected dense NMatrix for arg 0
    #   - +RangeError+ -> n out of range
    #
    def scal(alpha, vector, incx=1, n=nil)
      n ||= vector.size / incx
      raise(ArgumentError, "Expected dense NMatrix for arg 0") unless vector.is_a?(NMatrix)
      raise(RangeError, "n out of range") if n*incx > vector.size || n*incx <= 0 || n <= 0
      ::NMatrix::BLAS.cblas_scal(n, alpha, vector, incx)
    end

    # The following are functions that used to be implemented in C, but
    # now require nmatrix-atlas or nmatrix-lapcke to run properly, so we can just
    # implemented their stubs in Ruby.
    def cblas_trmm(order, side, uplo, trans_a, diag, m, n, alpha, a, lda, b, ldb)
      raise(NotImplementedError,"cblas_trmm requires either the
       nmatrix-lapacke or nmatrix-atlas gem")
    end

    def cblas_syrk(order, uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
      raise(NotImplementedError,"cblas_syrk requires either the
       nmatrix-lapacke or nmatrix-atlas gem")
    end

    def cblas_herk(order, uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
      raise(NotImplementedError,"cblas_herk requires either the
       nmatrix-lapacke or nmatrix-atlas gem")
    end
  end
end


================================================
FILE: lib/nmatrix/cruby/math.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == math.rb
#
# Math functionality for NMatrix, along with any NMatrix instance
# methods that correspond to ATLAS/BLAS/LAPACK functions (e.g.,
# laswp).
#++

class NMatrix

  #
  # call-seq:
  #     getrf! -> Array
  #
  # LU factorization of a general M-by-N matrix +A+ using partial pivoting with
  # row interchanges. The LU factorization is A = PLU, where P is a row permutation
  # matrix, L is a lower triangular matrix with unit diagonals, and U is an upper
  # triangular matrix (note that this convention is different from the
  # clapack_getrf behavior, but matches the standard LAPACK getrf).
  # +A+ is overwritten with the elements of L and U (the unit
  # diagonal elements of L are not saved). P is not returned directly and must be
  # constructed from the pivot array ipiv. The row indices in ipiv are indexed
  # starting from 1.
  # Only works for dense matrices.
  #
  # * *Returns* :
  #   - The IPIV vector. The L and U matrices are stored in A.
  # * *Raises* :
  #   - +StorageTypeError+ -> ATLAS functions only work on dense matrices.
  #
  def getrf!
    raise(StorageTypeError, "ATLAS functions only work on dense matrices") unless self.dense?

    #For row-major matrices, clapack_getrf uses a different convention than
    #described above (U has unit diagonal elements instead of L and columns
    #are interchanged rather than rows). For column-major matrices, clapack
    #uses the stanard conventions. So we just transpose the matrix before
    #and after calling clapack_getrf.
    #Unfortunately, this is not a very good way, uses a lot of memory.
    temp = self.transpose
    ipiv = NMatrix::LAPACK::clapack_getrf(:col, self.shape[0], self.shape[1], temp, self.shape[0])
    temp = temp.transpose
    self[0...self.shape[0], 0...self.shape[1]] = temp

    #for some reason, in clapack_getrf, the indices in ipiv start from 0
    #instead of 1 as in LAPACK.
    ipiv.each_index { |i| ipiv[i]+=1 }

    return ipiv
  end

  #
  # call-seq:
  #     geqrf! -> shape.min x 1 NMatrix
  #
  # QR factorization of a general M-by-N matrix +A+.
  #
  # The QR factorization is A = QR, where Q is orthogonal and R is Upper Triangular
  # +A+ is overwritten with the elements of R and Q with Q being represented by the
  # elements below A's diagonal and an array of scalar factors in the output NMatrix.
  #
  # The matrix Q is represented as a product of elementary reflectors
  #     Q = H(1) H(2) . . . H(k), where k = min(m,n).
  #
  # Each H(i) has the form
  #
  #     H(i) = I - tau * v * v'
  #
  # http://www.netlib.org/lapack/explore-html/d3/d69/dgeqrf_8f.html
  #
  # Only works for dense matrices.
  #
  # * *Returns* :
  #   - Vector TAU. Q and R are stored in A. Q is represented by TAU and A
  # * *Raises* :
  #   - +StorageTypeError+ -> LAPACK functions only work on dense matrices.
  #
  def geqrf!
    # The real implementation is in lib/nmatrix/lapacke.rb
    raise(NotImplementedError, "geqrf! requires the nmatrix-lapacke gem")
  end

  #
  # call-seq:
  #     ormqr(tau) -> NMatrix
  #     ormqr(tau, side, transpose, c) -> NMatrix
  #
  # Returns the product Q * c or c * Q after a call to geqrf! used in QR factorization.
  # +c+ is overwritten with the elements of the result NMatrix if supplied. Q is the orthogonal matrix
  # represented by tau and the calling NMatrix
  #
  # Only works on float types, use unmqr for complex types.
  #
  # == Arguments
  #
  # * +tau+ - vector containing scalar factors of elementary reflectors
  # * +side+ - direction of multiplication [:left, :right]
  # * +transpose+ - apply Q with or without transpose [false, :transpose]
  # * +c+ - NMatrix multplication argument that is overwritten, no argument assumes c = identity
  #
  # * *Returns* :
  #
  #   - Q * c or c * Q Where Q may be transposed before multiplication.
  #
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> LAPACK functions only work on dense matrices.
  #   - +TypeError+ -> Works only on floating point matrices, use unmqr for complex types
  #   - +TypeError+ -> c must have the same dtype as the calling NMatrix
  #
  def ormqr(tau, side=:left, transpose=false, c=nil)
    # The real implementation is in lib/nmatrix/lapacke.rb
    raise(NotImplementedError, "ormqr requires the nmatrix-lapacke gem")

  end

  #
  # call-seq:
  #     unmqr(tau) -> NMatrix
  #     unmqr(tau, side, transpose, c) -> NMatrix
  #
  # Returns the product Q * c or c * Q after a call to geqrf! used in QR factorization.
  # +c+ is overwritten with the elements of the result NMatrix if it is supplied. Q is the orthogonal matrix
  # represented by tau and the calling NMatrix
  #
  # Only works on complex types, use ormqr for float types.
  #
  # == Arguments
  #
  # * +tau+ - vector containing scalar factors of elementary reflectors
  # * +side+ - direction of multiplication [:left, :right]
  # * +transpose+ - apply Q as Q or its complex conjugate [false, :complex_conjugate]
  # * +c+ - NMatrix multplication argument that is overwritten, no argument assumes c = identity
  #
  # * *Returns* :
  #
  #   - Q * c or c * Q Where Q may be transformed to its complex conjugate before multiplication.
  #
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> LAPACK functions only work on dense matrices.
  #   - +TypeError+ -> Works only on floating point matrices, use unmqr for complex types
  #   - +TypeError+ -> c must have the same dtype as the calling NMatrix
  #
  def unmqr(tau, side=:left, transpose=false, c=nil)
    # The real implementation is in lib/nmatrix/lapacke.rb
    raise(NotImplementedError, "unmqr requires the nmatrix-lapacke gem")
  end

  #
  # call-seq:
  #     potrf!(upper_or_lower) -> NMatrix
  #
  # Cholesky factorization of a symmetric positive-definite matrix -- or, if complex,
  # a Hermitian positive-definite matrix +A+.
  # The result will be written in either the upper or lower triangular portion of the
  # matrix, depending on whether the argument is +:upper+ or +:lower+.
  # Also the function only reads in the upper or lower part of the matrix,
  # so it doesn't actually have to be symmetric/Hermitian.
  # However, if the matrix (i.e. the symmetric matrix implied by the lower/upper
  # half) is not positive-definite, the function will return nonsense.
  #
  # This functions requires either the nmatrix-atlas or nmatrix-lapacke gem
  # installed.
  #
  # * *Returns* :
  #   the triangular portion specified by the parameter
  # * *Raises* :
  #   - +StorageTypeError+ -> ATLAS functions only work on dense matrices.
  #   - +ShapeError+ -> Must be square.
  #   - +NotImplementedError+ -> If called without nmatrix-atlas or nmatrix-lapacke gem
  #
  def potrf!(which)
    # The real implementation is in the plugin files.
    raise(NotImplementedError, "potrf! requires either the nmatrix-atlas or nmatrix-lapacke gem")
  end

  def potrf_upper!
    potrf! :upper
  end

  def potrf_lower!
    potrf! :lower
  end


  #
  # call-seq:
  #     factorize_cholesky -> [upper NMatrix, lower NMatrix]
  #
  # Calculates the Cholesky factorization of a matrix and returns the
  # upper and lower matrices such that A=LU and L=U*, where * is
  # either the transpose or conjugate transpose.
  #
  # Unlike potrf!, this makes method requires that the original is matrix is
  # symmetric or Hermitian. However, it is still your responsibility to make
  # sure it is positive-definite.
  def factorize_cholesky
    raise "Matrix must be symmetric/Hermitian for Cholesky factorization" unless self.hermitian?
    l = self.clone.potrf_lower!.tril!
    u = l.conjugate_transpose
    [u,l]
  end

  #
  # call-seq:
  #     factorize_lu -> ...
  #
  # LU factorization of a matrix. Optionally return the permutation matrix.
  #   Note that computing the permutation matrix will introduce a slight memory
  #   and time overhead.
  #
  # == Arguments
  #
  # +with_permutation_matrix+ - If set to *true* will return the permutation
  #   matrix alongwith the LU factorization as a second return value.
  #
  def factorize_lu with_permutation_matrix=nil
    raise(NotImplementedError, "only implemented for dense storage") unless self.stype == :dense
    raise(NotImplementedError, "matrix is not 2-dimensional") unless self.dimensions == 2

    t     = self.clone
    pivot = t.getrf!
    return t unless with_permutation_matrix

    [t, FactorizeLUMethods.permutation_matrix_from(pivot)]
  end

  #
  # call-seq:
  #     factorize_qr -> [Q,R]
  #
  # QR factorization of a matrix without column pivoting.
  # Q is orthogonal and R is upper triangular if input is square or upper trapezoidal if
  # input is rectangular.
  #
  # Only works for dense matrices.
  #
  # * *Returns* :
  #   - Array containing Q and R matrices
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> only implemented for desnse storage.
  #   - +ShapeError+ -> Input must be a 2-dimensional matrix to have a QR decomposition.
  #
  def factorize_qr
    raise(NotImplementedError, "only implemented for dense storage") unless self.stype == :dense
    raise(ShapeError, "Input must be a 2-dimensional matrix to have a QR decomposition") unless self.dim == 2

    rows, columns = self.shape
    r = self.clone
    tau =  r.geqrf!

    #Obtain Q
    q = self.complex_dtype? ? r.unmqr(tau) : r.ormqr(tau)

    #Obtain R
    if rows <= columns
      r.upper_triangle!
    #Need to account for upper trapezoidal structure if R is a tall rectangle (rows > columns)
    else
      r[0...columns, 0...columns].upper_triangle!
      r[columns...rows, 0...columns] = 0
    end

    [q,r]
  end

  # Solve the matrix equation AX = B, where A is +self+, B is the first
  # argument, and X is returned. A must be a nxn square matrix, while B must be
  # nxm. Only works with dense matrices and non-integer, non-object data types.
  #
  # == Arguments
  #
  # * +b+ - the right hand side
  #
  # == Options
  #
  # * +form+ - Signifies the form of the matrix A in the linear system AX=B.
  #   If not set then it defaults to +:general+, which uses an LU solver.
  #   Other possible values are +:lower_tri+, +:upper_tri+ and +:pos_def+ (alternatively,
  #   non-abbreviated symbols +:lower_triangular+, +:upper_triangular+,
  #   and +:positive_definite+ can be used.
  #   If +:lower_tri+ or +:upper_tri+ is set, then a specialized linear solver for linear
  #   systems AX=B with a lower or upper triangular matrix A is used. If +:pos_def+ is chosen,
  #   then the linear system is solved via the Cholesky factorization.
  #   Note that when +:lower_tri+ or +:upper_tri+ is used, then the algorithm just assumes that
  #   all entries in the lower/upper triangle of the matrix are zeros without checking (which
  #   can be useful in certain applications).
  #
  #
  # == Usage
  #
  #   a = NMatrix.new [2,2], [3,1,1,2], dtype: dtype
  #   b = NMatrix.new [2,1], [9,8], dtype: dtype
  #   a.solve(b)
  #
  #   # solve an upper triangular linear system more efficiently:
  #   require 'benchmark'
  #   require 'nmatrix/lapacke'
  #   rand_mat = NMatrix.random([10000, 10000], dtype: :float64)
  #   a = rand_mat.triu
  #   b = NMatrix.random([10000, 10], dtype: :float64)
  #   Benchmark.bm(10) do |bm|
  #     bm.report('general') { a.solve(b) }
  #     bm.report('upper_tri') { a.solve(b, form: :upper_tri) }
  #   end
  #   #                   user     system      total        real
  #   #  general     73.170000   0.670000  73.840000 ( 73.810086)
  #   #  upper_tri    0.180000   0.000000   0.180000 (  0.182491)
  #
  def solve(b, opts = {})
    raise(ShapeError, "Must be called on square matrix") unless self.dim == 2 && self.shape[0] == self.shape[1]
    raise(ShapeError, "number of rows of b must equal number of cols of self") if
      self.shape[1] != b.shape[0]
    raise(ArgumentError, "only works with dense matrices") if self.stype != :dense
    raise(ArgumentError, "only works for non-integer, non-object dtypes") if
      integer_dtype? or object_dtype? or b.integer_dtype? or b.object_dtype?

    opts = { form: :general }.merge(opts)
    x    = b.clone
    n    = self.shape[0]
    nrhs = b.shape[1]

    case opts[:form]
    when :general
      clone = self.clone
      ipiv = NMatrix::LAPACK.clapack_getrf(:row, n, n, clone, n)
      # When we call clapack_getrs with :row, actually only the first matrix
      # (i.e. clone) is interpreted as row-major, while the other matrix (x)
      # is interpreted as column-major. See here: http://math-atlas.sourceforge.net/faq.html#RowSolve
      # So we must transpose x before and after
      # calling it.
      x = x.transpose
      NMatrix::LAPACK.clapack_getrs(:row, :no_transpose, n, nrhs, clone, n, ipiv, x, n)
      x.transpose
    when :upper_tri, :upper_triangular
      raise(ArgumentError, "upper triangular solver does not work with complex dtypes") if
        complex_dtype? or b.complex_dtype?
      # this is the correct function call; see https://github.com/SciRuby/nmatrix/issues/374
      NMatrix::BLAS::cblas_trsm(:row, :left, :upper, false, :nounit, n, nrhs, 1.0, self, n, x, nrhs)
      x
    when :lower_tri, :lower_triangular
      raise(ArgumentError, "lower triangular solver does not work with complex dtypes") if
        complex_dtype? or b.complex_dtype?
      NMatrix::BLAS::cblas_trsm(:row, :left, :lower, false, :nounit, n, nrhs, 1.0, self, n, x, nrhs)
      x
    when :pos_def, :positive_definite
      u, l = self.factorize_cholesky
      z = l.solve(b, form: :lower_tri)
      u.solve(z, form: :upper_tri)
    else
      raise(ArgumentError, "#{opts[:form]} is not a valid form option")
    end
  end

  #
  # call-seq:
  #     least_squares(b) -> NMatrix
  #     least_squares(b, tolerance: 10e-10) -> NMatrix
  #
  # Provides the linear least squares approximation of an under-determined system
  # using QR factorization provided that the matrix is not rank-deficient.
  #
  # Only works for dense matrices.
  #
  # * *Arguments* :
  #   - +b+ -> The solution column vector NMatrix of A * X = b.
  #   - +tolerance:+ -> Absolute tolerance to check if a diagonal element in A = QR is near 0
  #
  # * *Returns* :
  #   - NMatrix that is a column vector with the LLS solution
  #
  # * *Raises* :
  #   - +ArgumentError+ -> least squares approximation only works for non-complex types
  #   - +ShapeError+ -> system must be under-determined ( rows > columns )
  #
  # Examples :-
  #
  #   a = NMatrix.new([3,2], [2.0, 0, -1, 1, 0, 2])
  #
  #   b = NMatrix.new([3,1], [1.0, 0, -1])
  #
  #   a.least_squares(b)
  #     =>[
  #         [ 0.33333333333333326 ]
  #         [ -0.3333333333333334 ]
  #       ]
  #
  def least_squares(b, tolerance: 10e-6)
    raise(ArgumentError, "least squares approximation only works for non-complex types") if
      self.complex_dtype?

    rows, columns = self.shape

    raise(ShapeError, "system must be under-determined ( rows > columns )") unless
      rows > columns

    #Perform economical QR factorization
    r = self.clone
    tau = r.geqrf!
    q_transpose_b = r.ormqr(tau, :left, :transpose, b)

    #Obtain R from geqrf! intermediate
    r[0...columns, 0...columns].upper_triangle!
    r[columns...rows, 0...columns] = 0

    diagonal = r.diagonal

    raise(ArgumentError, "rank deficient matrix") if diagonal.any? { |x| x == 0 }

    if diagonal.any? { |x| x.abs < tolerance }
      warn "warning: A diagonal element of R in A = QR is close to zero ;" <<
           " indicates a possible loss of precision"
    end

    # Transform the system A * X = B to R1 * X = B2 where B2 = Q1_t * B
    r1 = r[0...columns, 0...columns]
    b2 = q_transpose_b[0...columns]

    nrhs = b2.shape[1]

    #Solve the upper triangular system
    NMatrix::BLAS::cblas_trsm(:row, :left, :upper, false, :nounit, r1.shape[0], nrhs, 1.0, r1, r1.shape[0], b2, nrhs)
    b2
  end

  #
  # call-seq:
  #     gesvd! -> [u, sigma, v_transpose]
  #     gesvd! -> [u, sigma, v_conjugate_transpose] # complex
  #
  # Compute the singular value decomposition of a matrix using LAPACK's GESVD function.
  # This is destructive, modifying the source NMatrix.  See also #gesdd.
  #
  # Optionally accepts a +workspace_size+ parameter, which will be honored only if it is larger than what LAPACK
  # requires.
  #
  def gesvd!(workspace_size=1)
    NMatrix::LAPACK::gesvd(self, workspace_size)
  end

  #
  # call-seq:
  #     gesvd -> [u, sigma, v_transpose]
  #     gesvd -> [u, sigma, v_conjugate_transpose] # complex
  #
  # Compute the singular value decomposition of a matrix using LAPACK's GESVD function.
  #
  # Optionally accepts a +workspace_size+ parameter, which will be honored only if it is larger than what LAPACK
  # requires.
  #
  def gesvd(workspace_size=1)
    self.clone.gesvd!(workspace_size)
  end


  #
  # call-seq:
  #     gesdd! -> [u, sigma, v_transpose]
  #     gesdd! -> [u, sigma, v_conjugate_transpose] # complex
  #
  # Compute the singular value decomposition of a matrix using LAPACK's GESDD function. This uses a divide-and-conquer
  # strategy. This is destructive, modifying the source NMatrix.  See also #gesvd.
  #
  # Optionally accepts a +workspace_size+ parameter, which will be honored only if it is larger than what LAPACK
  # requires.
  #
  def gesdd!(workspace_size=nil)
    NMatrix::LAPACK::gesdd(self, workspace_size)
  end

  #
  # call-seq:
  #     gesdd -> [u, sigma, v_transpose]
  #     gesdd -> [u, sigma, v_conjugate_transpose] # complex
  #
  # Compute the singular value decomposition of a matrix using LAPACK's GESDD function. This uses a divide-and-conquer
  # strategy. See also #gesvd.
  #
  # Optionally accepts a +workspace_size+ parameter, which will be honored only if it is larger than what LAPACK
  # requires.
  #
  def gesdd(workspace_size=nil)
    self.clone.gesdd!(workspace_size)
  end

  #
  # call-seq:
  #     laswp!(ary) -> NMatrix
  #
  # In-place permute the columns of a dense matrix using LASWP according to the order given as an array +ary+.
  #
  # If +:convention+ is +:lapack+, then +ary+ represents a sequence of pair-wise permutations which are
  # performed successively. That is, the i'th entry of +ary+ is the index of the column to swap
  # the i'th column with, having already applied all earlier swaps.
  #
  # If +:convention+ is +:intuitive+, then +ary+ represents the order of columns after the permutation.
  # That is, the i'th entry of +ary+ is the index of the column that will be in position i after the
  # reordering (Matlab-like behaviour). This is the default.
  #
  # Not yet implemented for yale or list.
  #
  # == Arguments
  #
  # * +ary+ - An Array specifying the order of the columns. See above for details.
  #
  # == Options
  #
  # * +:covention+ - Possible values are +:lapack+ and +:intuitive+. Default is +:intuitive+. See above for details.
  #
  def laswp!(ary, opts={})
    raise(StorageTypeError, "ATLAS functions only work on dense matrices") unless self.dense?
    opts = { convention: :intuitive }.merge(opts)

    if opts[:convention] == :intuitive
      if ary.length != ary.uniq.length
        raise(ArgumentError, "No duplicated entries in the order array are allowed under convention :intuitive")
      end
      n = self.shape[1]
      p = []
      order = (0...n).to_a
      0.upto(n-2) do |i|
        p[i] = order.index(ary[i])
        order[i], order[p[i]] = order[p[i]], order[i]
      end
      p[n-1] = n-1
    else
      p = ary
    end

    NMatrix::LAPACK::laswp(self, p)
  end

  #
  # call-seq:
  #     laswp(ary) -> NMatrix
  #
  # Permute the columns of a dense matrix using LASWP according to the order given in an array +ary+.
  #
  # If +:convention+ is +:lapack+, then +ary+ represents a sequence of pair-wise permutations which are
  # performed successively. That is, the i'th entry of +ary+ is the index of the column to swap
  # the i'th column with, having already applied all earlier swaps. This is the default.
  #
  # If +:convention+ is +:intuitive+, then +ary+ represents the order of columns after the permutation.
  # That is, the i'th entry of +ary+ is the index of the column that will be in position i after the
  # reordering (Matlab-like behaviour).
  #
  # Not yet implemented for yale or list.
  #
  # == Arguments
  #
  # * +ary+ - An Array specifying the order of the columns. See above for details.
  #
  # == Options
  #
  # * +:covention+ - Possible values are +:lapack+ and +:intuitive+. Default is +:lapack+. See above for details.
  #
  def laswp(ary, opts={})
    self.clone.laswp!(ary, opts)
  end

  #
  # call-seq:
  #     det -> determinant
  #
  # Calculate the determinant by way of LU decomposition. This is accomplished
  # using clapack_getrf, and then by taking the product of the diagonal elements. There is a
  # risk of underflow/overflow.
  #
  # There are probably also more efficient ways to calculate the determinant.
  # This method requires making a copy of the matrix, since clapack_getrf
  # modifies its input.
  #
  # For smaller matrices, you may be able to use +#det_exact+.
  #
  # This function is guaranteed to return the same type of data in the matrix
  # upon which it is called.
  #
  # Integer matrices are converted to floating point matrices for the purposes of
  # performing the calculation, as xGETRF can't work on integer matrices.
  #
  # * *Returns* :
  #   - The determinant of the matrix. It's the same type as the matrix's dtype.
  # * *Raises* :
  #   - +ShapeError+ -> Must be used on square matrices.
  #
  def det
    raise(ShapeError, "determinant can be calculated only for square matrices") unless self.dim == 2 && self.shape[0] == self.shape[1]

    # Cast to a dtype for which getrf is implemented
    new_dtype = self.integer_dtype? ? :float64 : self.dtype
    copy = self.cast(:dense, new_dtype)

    # Need to know the number of permutations. We'll add up the diagonals of
    # the factorized matrix.
    pivot = copy.getrf!

    num_perm = 0 #number of permutations
    pivot.each_with_index do |swap, i|
      #pivot indexes rows starting from 1, instead of 0, so need to subtract 1 here
      num_perm += 1 if swap-1 != i
    end
    prod = num_perm % 2 == 1 ? -1 : 1 # odd permutations => negative
    [shape[0],shape[1]].min.times do |i|
      prod *= copy[i,i]
    end

    # Convert back to an integer if necessary
    new_dtype != self.dtype ? prod.round : prod #prevent rounding errors
  end

  #
  # call-seq:
  #     complex_conjugate -> NMatrix
  #     complex_conjugate(new_stype) -> NMatrix
  #
  # Get the complex conjugate of this matrix. See also complex_conjugate! for
  # an in-place operation (provided the dtype is already +:complex64+ or
  # +:complex128+).
  #
  # Doesn't work on list matrices, but you can optionally pass in the stype you
  # want to cast to if you're dealing with a list matrix.
  #
  # * *Arguments* :
  #   - +new_stype+ -> stype for the new matrix.
  # * *Returns* :
  #   - If the original NMatrix isn't complex, the result is a +:complex128+ NMatrix. Otherwise, it's the original dtype.
  #
  def complex_conjugate(new_stype = self.stype)
    self.cast(new_stype, NMatrix::upcast(dtype, :complex64)).complex_conjugate!
  end

  #
  # call-seq:
  #     conjugate_transpose -> NMatrix
  #
  # Calculate the conjugate transpose of a matrix. If your dtype is already
  # complex, this should only require one copy (for the transpose).
  #
  # * *Returns* :
  #   - The conjugate transpose of the matrix as a copy.
  #
  def conjugate_transpose
    self.transpose.complex_conjugate!
  end

  #
  # call-seq:
  #     absolute_sum -> Numeric
  #
  # == Arguments
  #   - +incx+ -> the skip size (defaults to 1, no skip)
  #   - +n+ -> the number of elements to include
  #
  # Return the sum of the contents of the vector. This is the BLAS asum routine.
  def asum incx=1, n=nil
    if self.shape == [1]
      return self[0].abs unless self.complex_dtype?
      return self[0].real.abs + self[0].imag.abs
    end
    return method_missing(:asum, incx, n) unless vector?
    NMatrix::BLAS::asum(self, incx, self.size / incx)
  end
  alias :absolute_sum :asum

  #
  # call-seq:
  #     norm2 -> Numeric
  #
  # == Arguments
  #   - +incx+ -> the skip size (defaults to 1, no skip)
  #   - +n+ -> the number of elements to include
  #
  # Return the 2-norm of the vector. This is the BLAS nrm2 routine.
  def nrm2 incx=1, n=nil
    return method_missing(:nrm2, incx, n) unless vector?
    NMatrix::BLAS::nrm2(self, incx, self.size / incx)
  end
  alias :norm2 :nrm2

  #
  # call-seq:
  #     scale! -> NMatrix
  #
  # == Arguments
  #   - +alpha+ -> Scalar value used in the operation.
  #   - +inc+ -> Increment used in the scaling function. Should generally be 1.
  #   - +n+ -> Number of elements of +vector+.
  #
  # This is a destructive method, modifying the source NMatrix.  See also #scale.
  # Return the scaling result of the matrix. BLAS scal will be invoked if provided.

  def scale!(alpha, incx=1, n=nil)
    raise(DataTypeError, "Incompatible data type for the scaling factor") unless
        NMatrix::upcast(self.dtype, NMatrix::min_dtype(alpha)) == self.dtype
    return NMatrix::BLAS::scal(alpha, self, incx, self.size / incx) if NMatrix::BLAS.method_defined? :scal
    self.each_stored_with_indices do |e, *i|
      self[*i] = e*alpha
    end
  end

  #
  # call-seq:
  #     scale -> NMatrix
  #
  # == Arguments
  #   - +alpha+ -> Scalar value used in the operation.
  #   - +inc+ -> Increment used in the scaling function. Should generally be 1.
  #   - +n+ -> Number of elements of +vector+.
  #
  # Return the scaling result of the matrix. BLAS scal will be invoked if provided.

  def scale(alpha, incx=1, n=nil)
    return self.clone.scale!(alpha, incx, n)
  end

  alias :permute_columns  :laswp
  alias :permute_columns! :laswp!

end


================================================
FILE: lib/nmatrix/enumerate.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == enumerate.rb
#
# Enumeration methods for NMatrix
#++

class NMatrix
  include Enumerable

  ##
  # call-seq:
  #   each -> Enumerator
  #
  # Enumerate through the matrix. @see Enumerable#each
  #
  # For dense, this actually calls a specialized each iterator (in C). For yale and list, it relies upon
  # #each_with_indices (which is about as fast as reasonably possible for C code).
  def each &bl
    if self.stype == :dense
      self.__dense_each__(&bl)
    elsif block_given?
      self.each_with_indices(&bl)
    else # Handle case where no block is given
      Enumerator.new do |yielder|
        self.each_with_indices do |params|
          yielder.yield params
        end
      end
    end
  end

  #
  # call-seq:
  #     flat_map -> Enumerator
  #     flat_map { |elem| block } -> Array
  #
  # Maps using Enumerator (returns an Array or an Enumerator)
  alias_method :flat_map, :map

  ##
  # call-seq:
  #   map -> Enumerator
  #   map { |elem| block } -> NMatrix
  #
  # Returns an NMatrix if a block is given. For an Array, use #flat_map
  #
  # Note that #map will always return an :object matrix, because it has no way of knowing
  # how to handle operations on the different dtypes.
  #
  def map(&bl)
    return enum_for(:map) unless block_given?
    # NMatrix-jruby currently supports only doubles
    cp  = jruby? ? self : self.cast(dtype: :object)
    cp.map!(&bl)
    cp
  end

  ##
  # call-seq:
  #   map! -> Enumerator
  #   map! { |elem| block } -> NMatrix
  #
  # Maps in place.
  # @see #map
  #
  def map!
    return enum_for(:map!) unless block_given?
    iterated = false
    self.each_stored_with_indices do |e, *i|
      iterated = true
      self[*i] = (yield e)
    end
    #HACK: if there's a single element in a non-dense matrix, it won't iterate and
    #won't change the default value; this ensures that it does get changed.
    unless iterated then
      self.each_with_indices do |e, *i|
        self[*i] = (yield e)
      end
    end
  end


  #
  # call-seq:
  #     each_rank() -> NMatrix
  #     each_rank() { |rank| block } -> NMatrix
  #     each_rank(dimen) -> Enumerator
  #     each_rank(dimen) { |rank| block } -> NMatrix
  #
  # Generic for @each_row, @each_col
  #
  # Iterate through each rank by reference.
  #
  # @param [Fixnum] dimen the rank being iterated over.
  #
  def each_rank(dimen=0, get_by=:reference)
    return enum_for(:each_rank, dimen, get_by) unless block_given?
    (0...self.shape[dimen]).each do |idx|
      yield self.rank(dimen, idx, get_by)
    end
    self
  end
  alias :each_along_dim :each_rank

  #
  # call-seq:
  #     each_row { |row| block } -> NMatrix
  #
  # Iterate through each row, referencing it as an NMatrix slice.
  def each_row(get_by=:reference)
    return enum_for(:each_row, get_by) unless block_given?
    (0...self.shape[0]).each do |i|
      yield self.row(i, get_by)
    end
    self
  end

  #
  # call-seq:
  #     each_column { |column| block } -> NMatrix
  #
  # Iterate through each column, referencing it as an NMatrix slice.
  def each_column(get_by=:reference)
    return enum_for(:each_column, get_by) unless block_given?
    (0...self.shape[1]).each do |j|
      yield self.column(j, get_by)
    end
    self
  end

  #
  # call-seq:
  #     each_layer -> { |column| block } -> ...
  #
  # Iterate through each layer, referencing it as an NMatrix slice.
  #
  # Note: If you have a 3-dimensional matrix, the first dimension contains rows,
  # the second contains columns, and the third contains layers.
  def each_layer(get_by=:reference)
    return enum_for(:each_layer, get_by) unless block_given?
    (0...self.shape[2]).each do |k|
      yield self.layer(k, get_by)
    end
    self
  end


  #
  # call-seq:
  #     each_stored_with_index -> Enumerator
  #
  # Allow iteration across a vector NMatrix's stored values. See also @each_stored_with_indices
  #
  def each_stored_with_index(&block)
    raise(NotImplementedError, "only works for dim 2 vectors") unless self.dim <= 2
    return enum_for(:each_stored_with_index) unless block_given?

    self.each_stored_with_indices do |v, i, j|
      if shape[0] == 1
        yield(v,j)
      elsif shape[1] == 1
        yield(v,i)
      else
        method_missing(:each_stored_with_index, &block)
      end
    end
    self
  end


  ##
  # call-seq:
  #   inject_rank() -> Enumerator
  #   inject_rank(dimen) -> Enumerator
  #   inject_rank(dimen, initial) -> Enumerator
  #   inject_rank(dimen, initial, dtype) -> Enumerator
  #   inject_rank() { |elem| block } -> NMatrix
  #   inject_rank(dimen) { |elem| block } -> NMatrix
  #   inject_rank(dimen, initial) { |elem| block } -> NMatrix
  #   inject_rank(dimen, initial, dtype) { |elem| block } -> NMatrix
  #
  # Reduces an NMatrix using a supplied block over a specified dimension.
  # The block should behave the same way as for Enumerable#reduce.
  #
  # @param [Integer] dimen the dimension being reduced
  # @param [Numeric] initial the initial value for the reduction
  #  (i.e. the usual parameter to Enumerable#reduce).  Supply nil or do not
  #  supply this argument to have it follow the usual Enumerable#reduce
  #  behavior of using the first element as the initial value.
  # @param [Symbol] dtype if non-nil/false, forces the accumulated result to have this dtype
  # @return [NMatrix] an NMatrix with the same number of dimensions as the
  #  input, but with the input dimension now having size 1.  Each element
  #  is the result of the reduction at that position along the specified
  #  dimension.
  #
  def inject_rank(dimen=0, initial=nil, dtype=nil)

    raise(RangeError, "requested dimension (#{dimen}) does not exist (shape: #{shape})") if dimen > self.dim

    return enum_for(:inject_rank, dimen, initial, dtype) unless block_given?

    new_shape = shape.dup
    new_shape[dimen] = 1

    first_as_acc = false

    if initial then
      acc = NMatrix.new(new_shape, initial, :dtype => dtype || self.dtype, stype: self.stype)
    else
      each_rank(dimen) do |sub_mat|
        acc = (sub_mat.is_a?(NMatrix) and !dtype.nil? and dtype != self.dtype) ? sub_mat.cast(self.stype, dtype) : sub_mat
        break
      end
      first_as_acc = true
    end

    each_rank(dimen) do |sub_mat|
      if first_as_acc
        first_as_acc = false
        next
      end
      acc = yield(acc, sub_mat)
    end

    acc
  end

  alias :reduce_along_dim :inject_rank
  alias :inject_along_dim :inject_rank

end


================================================
FILE: lib/nmatrix/fftw.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == fftw.rb
#
# ruby file for the nmatrix-fftw gem. Loads the C extension and defines
# nice ruby interfaces for FFTW functions.
#++

require 'nmatrix/nmatrix.rb'
require "nmatrix_fftw.so"

class NMatrix

  # Compute 1D FFT of the matrix using FFTW default parameters.
  # @return [NMatrix] NMatrix of dtype :complex128 containing computed values.
  # @example Compute 1D FFT of an NMatrix.
  #   nm = NMatrix.new([10],
  #     [
  #       Complex(9.32,0), Complex(44,0), Complex(125,0), Complex(34,0),
  #       Complex(31,0),   Complex(44,0), Complex(12,0),  Complex(1,0),
  #       Complex(53.23,0),Complex(-23.23,0)
  #     ], dtype: :complex128)
  #   nm.fft
  def fft
    input = self.dtype == :complex128 ? self : self.cast(dtype: :complex128)
    plan  = NMatrix::FFTW::Plan.new([self.size])
    plan.set_input input
    plan.execute
    plan.output
  end

  # Compute 2D FFT of a 2D matrix using FFTW default parameters.
  # @return [NMatrix] NMatrix of dtype :complex128 containing computed values.
  def fft2
    raise ShapeError, "Shape must be 2 (is #{self.shape})" if self.shape.size != 2
    input = self.dtype == :complex128 ? self : self.cast(dtype: :complex128)
    plan  = NMatrix::FFTW::Plan.new(self.shape, dim: 2)
    plan.set_input input
    plan.execute
    plan.output
  end

  module FFTW
    class Plan
      # Hash which holds the numerical values of constants that determine
      # the kind of transform that will be computed for a real input/real
      # output instance. These are one-one mappings to the respective constants
      # specified in FFTW. For example, for specifying the FFTW_R2HC constant
      # as the 'kind', pass the symbol :r2hc.
      #
      # @see http://www.fftw.org/fftw3_doc/Real_002dto_002dReal-Transform-Kinds.html#Real_002dto_002dReal-Transform-Kinds
      REAL_REAL_FFT_KINDS_HASH = {
        r2hc:    0,
        hc2r:    1,
        dht:     2,
        redft00: 3,
        redft01: 4,
        redft10: 5,
        redft11: 6,
        rodft00: 7,
        rodft01: 9,
        rodft10: 8,
        rodft11: 10
      }

      # Hash holding the numerical values of the flags that are passed in the 
      # `flags` argument of a FFTW planner routine. Multiple flags can be passed
      # to one instance of the planner. Their values are OR'd ('|') and then passed.
      # For example, for passing the FFTW_ESTIMATE constant, use :estimate.
      #
      # nmatrix-fftw supports the following flags into the planning routine:
      # * :estimate - Equivalent to FFTW_ESTIMATE. Specifies that, instead of 
      #   actual measurements of different algorithms, a simple heuristic is 
      #   used to pick a (probably sub-optimal) plan quickly. With this flag, 
      #   the input/output arrays are not overwritten during planning.
      # * :measure - Equivalent to FFTW_MEASURE. Tells FFTW to find an optimized
      #   plan by actually computing several FFTs and measuring their execution
      #   time. Depending on your machine, this can take some time (often a few 
      #   seconds).
      # * :patient - Equivalent to FFTW_PATIENT. Like FFTW_MEASURE, but considers
      #   a wider range of algorithms and often produces a “more optimal” plan 
      #   (especially for large transforms), but at the expense of several times
      #   longer planning time (especially for large transforms).
      # * :exhaustive - Equivalent to FFTW_EXHAUSTIVE. Like FFTW_PATIENT, but 
      #   considers an even wider range of algorithms, including many that we 
      #   think are unlikely to be fast, to produce the most optimal plan but 
      #   with a substantially increased planning time.
      #
      # @see http://www.fftw.org/fftw3_doc/Planner-Flags.html#Planner-Flags
      FLAG_VALUE_HASH = {
        estimate: 64,
        measure: 0,
        exhaustive: 8,
        patient: 32
      }

      # Hash holding numerical values of the direction in which a :complex_complex
      # type FFT should be performed.
      #
      # @see http://www.fftw.org/fftw3_doc/Complex-One_002dDimensional-DFTs.html#Complex-One_002dDimensional-DFTs
      # (The fourth argument, sign, can be either FFTW_FORWARD (-1) or 
      # FFTW_BACKWARD (+1), and indicates the direction of the transform you are
      # interested in; technically, it is the sign of the exponent in the transform)
      FFT_DIRECTION_HASH = {
        forward: -1,
        backward: 1
      }

      # Hash holding numerical equivalents of the DFT type. Used for determining
      # DFT type in C level.
      DATA_TYPE_HASH = {
        complex_complex: 0,
        real_complex:    1,
        complex_real:    2,
        real_real:       3
      }

      # Array holding valid options that can be passed into NMatrix::FFTW::Plan
      # so that invalid options aren't passed.
      VALID_OPTS = [:dim, :type, :direction, :flags, :real_real_kind]

      # @!attribute [r] shape
      #   @return [Array] Shape of the plan. Sequence of Fixnums.
      attr_reader :shape

      # @!attribute [r] size
      #   @return [Numeric] Size of the plan.
      attr_reader :size

      # @!attribute [r] type
      #   @return [Symbol] Type of the plan. Can be :complex_complex, 
      #   :complex_real, :real_complex or :real_real
      attr_reader :type

      # @!attribute [r] direction
      #   @return [Symbol] Can be :forward of :backward. Indicates the direction
      #   of the transform you are interested in; technically, it is the sign of
      #   the exponent in the transform. Valid only for :complex_complex type.
      attr_reader :direction

      # @!attribute [r] flags
      #   @return [Array<Symbol>] Can contain one or more symbols from
      #   FLAG_VALUE_HASH. Determines how the planner is prepared.
      #   @see FLAG_VALUE_HASH
      attr_reader :flags

      # @!attribute [r] dim
      #   @return [Fixnum] Dimension of the FFT. Should be 1 for 1-D FFT, 2 for
      #   2-D FFT and so on.
      attr_reader :dim

      # @!attribute [r] input
      #   @return [NMatrix] Input NMatrix. Will be valid once the 
      #   NMatrix::FFTW::Plan#set_input method has been called.
      attr_reader :input

      # @!attribute [r] output
      #   @return [NMatrix] Output NMatrix. Will be valid once the 
      #   NMatrix::FFTW::Plan#execute method has been called.
      attr_reader :output

      # @!attribute [r] real_real_kind
      #   @return [Symbol] Specifies the kind of real to real FFT being performed.
      #   This is a symbol from REAL_REAL_FFT_KINDS_HASH. Only valid when type
      #   of transform is of type :real_real.
      #   @see REAL_REAL_FFT_KINDS_HASH
      #   @see http://www.fftw.org/fftw3_doc/Real_002dto_002dReal-Transform-Kinds.html#Real_002dto_002dReal-Transform-Kinds
      attr_reader :real_real_kind

      # Create a plan for a DFT. The FFTW library requires that you first create
      # a plan for performing a DFT, so that FFTW can optimize its algorithms
      # according to your computer's hardware and various user supplied options.
      # 
      # @see http://www.fftw.org/doc/Using-Plans.html 
      #   For a comprehensive explanation of the FFTW planner.
      # @param shape [Array, Fixnum] Specify the shape of the plan. For 1D
      #   fourier transforms this can be a single number specifying the length of 
      #   the input. For multi-dimensional transforms, specify an Array containing
      #   the length of each dimension.
      # @param [Hash] opts the options to create a message with.
      # @option opts [Fixnum] :dim (1) The number of dimensions of the Fourier
      #   transform. If 'shape' has more numbers than :dim, the number of dimensions
      #   specified by :dim will be considered when making the plan.
      # @option opts [Symbol] :type (:complex_complex) The type of transform to
      #   perform based on the input and output data desired. The default value
      #   indicates that a transform is being planned that uses complex numbers
      #   as input and generates complex numbers as output. Similarly you can
      #   use :complex_real, :real_complex or :real_real to specify the kind
      #   of input and output that you will be supplying to the plan.
      #   @see DATA_TYPE_HASH
      # @option opts [Symbol, Array] :flags (:estimate) Specify one or more flags
      #   which denote the methodology that is used for deciding the algorithm used
      #   when planning the fourier transform. Use one or more of :estimate, :measure,
      #   :exhaustive and :patient. These flags map to the planner flags specified
      #   at http://www.fftw.org/fftw3_doc/Planner-Flags.html#Planner-Flags.
      #   @see REAL_REAL_FFT_KINDS_HASH
      # @option opts [Symbol] :direction (:forward) The direction of a DFT of
      #   type :complex_complex. Technically, it is the sign of the exponent in 
      #   the transform. :forward corresponds to -1 and :backward to +1.
      #   @see FFT_DIRECTION_HASH
      # @option opts [Array] :real_real_kind When the type of transform is :real_real,
      #   specify the kind of transform that should be performed FOR EACH AXIS
      #   of input. The position of the symbol in the Array corresponds to the 
      #   axis of the input. The number of elements in :real_real_kind must be equal to
      #   :dim. Can accept one of the inputs specified in REAL_REAL_FFT_KINDS_HASH.
      #   @see REAL_REAL_FFT_KINDS_HASH
      #   @see http://www.fftw.org/fftw3_doc/Real_002dto_002dReal-Transform-Kinds.html#Real_002dto_002dReal-Transform-Kinds
      # @example Create a plan for a basic 1D FFT and execute it.
      #   input = NMatrix.new([10],
      #     [
      #       Complex(9.32,0), Complex(44,0), Complex(125,0), Complex(34,0),
      #       Complex(31,0),   Complex(44,0), Complex(12,0),  Complex(1,0),
      #       Complex(53.23,0),Complex(-23.23,0),
      #     ], dtype: :complex128)
      #   plan = NMatrix::FFTW::Plan.new(10)
      #   plan.set_input input
      #   plan.execute
      #   print plan.output
      def initialize shape, opts={}
        verify_opts opts
        opts = {
          dim: 1,
          flags: :estimate,
          direction: :forward,
          type: :complex_complex
        }.merge(opts)

        @type      = opts[:type]
        @dim       = opts[:dim]
        @direction = opts[:direction]
        @shape     = shape.is_a?(Array) ? shape : [shape]
        @size      = @shape[0...@dim].inject(:*)
        @flags     = opts[:flags].is_a?(Array) ? opts[:flags] : [opts[:flags]]
        @real_real_kind    = opts[:real_real_kind]

        raise ArgumentError, ":real_real_kind option must be specified for :real_real type transforms" if
          @real_real_kind.nil? and @type == :real_real

        raise ArgumentError, "Specify kind of transform of each axis of input." if
          @real_real_kind and @real_real_kind.size != @dim

        raise ArgumentError, "dim (#{@dim}) cannot be more than size of shape #{@shape.size}" if
          @dim > @shape.size

        @plan_data = c_create_plan(@shape, @size, @dim, 
          combine_flags(@flags), FFT_DIRECTION_HASH[@direction], 
          DATA_TYPE_HASH[@type], encoded_rr_kind)
      end

      # Set input for the planned DFT.
      # @param [NMatrix] ip An NMatrix specifying the input to the FFT routine.
      #   The data type of the NMatrix must be either :complex128 or :float64
      #   depending on the type of FFT that has been planned. Size must be same
      #   as the size of the planned routine.
      # @raise [ArgumentError] if the input has any storage apart from :dense
      #   or if size/data type of the planned transform and the input matrix
      #   don't match.
      def set_input ip
        raise ArgumentError, "stype must be dense." if ip.stype != :dense
        raise ArgumentError, "size of input (#{ip.size}) cannot be greater than planned input size #{@size}" if
          ip.size != @size
        
        case @type
        when :complex_complex, :complex_real
          raise ArgumentError, "dtype must be complex128." if ip.dtype != :complex128
        when :real_complex, :real_real
          raise ArgumentError, "dtype must be float64." if ip.dtype != :float64
        else
          raise "Invalid type #{@type}"
        end

        @input = ip
        c_set_input(ip, @plan_data, DATA_TYPE_HASH[@type])
      end

      # Execute the DFT with the set plan.
      # @return [TrueClass] If all goes well and the fourier transform has been
      #   sucessfully computed, 'true' will be returned and you can access the
      #   computed output from the NMatrix::FFTW::Plan#output accessor.
      def execute
        @output = 
        case @type
        when :complex_complex
          @input.clone_structure        
        when :real_complex
          NMatrix.new([@input.size/2 + 1], dtype: :complex128)
        when :complex_real, :real_real
          NMatrix.new([@input.size], dtype: :float64)
        else
          raise TypeError, "Invalid type #{@type}"
        end

        c_execute(@output, @plan_data, DATA_TYPE_HASH[@type])
      end
     private

      # Combine flags received from the user (Symbols) into their respective
      # numeric equivalents and then 'OR' (|) all of them so the resulting number
      # can be passed directly to the FFTW planner function.
      def combine_flags flgs
        temp = 0
        flgs.each do |f|
          temp |= FLAG_VALUE_HASH[f]
        end
        temp
      end

      # Verify options passed into the constructor to make sure that no invalid
      # options have been passed.
      def verify_opts opts
        unless (opts.keys - VALID_OPTS).empty?
          raise ArgumentError, "#{opts.keys - VALID_OPTS} are invalid opts."
        end
      end

      # Get the numerical equivalents of the kind of real-real FFT to be computed.
      def encoded_rr_kind
        return @real_real_kind.map { |e| REAL_REAL_FFT_KINDS_HASH[e] } if @real_real_kind
      end
    end
  end
end

================================================
FILE: lib/nmatrix/homogeneous.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == homogeneous.rb
#
# This file contains optional shortcuts for generating homogeneous
# transformations.
#
#++

class NMatrix
  class << self
    #
    # call-seq:
    #     x_rotation(angle_in_radians) -> NMatrix
    #     x_rotation(angle_in_radians, dtype: dtype) -> NMatrix
    #     y_rotation(angle_in_radians) -> NMatrix
    #     y_rotation(angle_in_radians, dtype: dtype) -> NMatrix
    #     z_rotation(angle_in_radians) -> NMatrix
    #     z_rotation(angle_in_radians, dtype: dtype) -> NMatrix
    #
    # Generate a 4x4 homogeneous transformation matrix representing a rotation
    # about the x, y, or z axis respectively.
    #
    # * *Arguments* :
    #   - +angle_in_radians+ -> The angle of rotation in radians.
    #   - +dtype+ -> (optional) Default is +:float64+
    # * *Returns* :
    #   - A homogeneous transformation matrix consisting of a single rotation.
    #
    # Examples:
    #
    #    NMatrix.x_rotation(Math::PI.quo(6)) # =>
    #                                              1.0      0.0       0.0       0.0
    #                                              0.0      0.866025 -0.499999  0.0
    #                                              0.0      0.499999  0.866025  0.0
    #                                              0.0      0.0       0.0       1.0
    #
    #
    #    NMatrix.x_rotation(Math::PI.quo(6), dtype: :float32) # =>
    #                                              1.0      0.0       0.0       0.0
    #                                              0.0      0.866025 -0.5       0.0
    #                                              0.0      0.5       0.866025  0.0
    #                                              0.0      0.0       0.0       1.0
    #
    def x_rotation angle_in_radians, opts={}
      c = Math.cos(angle_in_radians)
      s = Math.sin(angle_in_radians)
      NMatrix.new(4, [1.0, 0.0, 0.0, 0.0,
                      0.0, c,   -s,  0.0,
                      0.0, s,    c,  0.0,
                      0.0, 0.0, 0.0, 1.0], {dtype: :float64}.merge(opts))
    end

    def y_rotation angle_in_radians, opts={}
      c = Math.cos(angle_in_radians)
      s = Math.sin(angle_in_radians)
      NMatrix.new(4, [ c,  0.0,  s,  0.0,
                      0.0, 1.0, 0.0, 0.0,
                      -s,  0.0,  c,  0.0,
                      0.0, 0.0, 0.0, 1.0], {dtype: :float64}.merge(opts))
    end

    def z_rotation angle_in_radians, opts={}
      c = Math.cos(angle_in_radians)
      s = Math.sin(angle_in_radians)
      NMatrix.new(4, [ c,  -s,  0.0, 0.0,
                       s,   c,  0.0, 0.0,
                      0.0, 0.0, 1.0, 0.0,
                      0.0, 0.0, 0.0, 1.0], {dtype: :float64}.merge(opts))
    end


    #
    # call-seq:
    #     translation(x, y, z) -> NMatrix
    #     translation([x,y,z]) -> NMatrix
    #     translation(translation_matrix) -> NMatrix
    #     translation(translation_matrix) -> NMatrix
    #     translation(translation, dtype: dtype) -> NMatrix
    #     translation(x, y, z, dtype: dtype) -> NMatrix
    #
    # Generate a 4x4 homogeneous transformation matrix representing a translation.
    #
    # * *Returns* :
    #   - A homogeneous transformation matrix consisting of a translation.
    #
    # Examples:
    #
    #    NMatrix.translation(4.0,5.0,6.0) # =>
    #                                          1.0   0.0   0.0   4.0
    #                                          0.0   1.0   0.0   5.0
    #                                          0.0   0.0   1.0   6.0
    #                                          0.0   0.0   0.0   1.0
    #
    #    NMatrix.translation(4.0,5.0,6.0, dtype: :int64) # =>
    #                                                         1  0  0  4
    #                                                         0  1  0  5
    #                                                         0  0  1  6
    #                                                         0  0  0  1
    #    NMatrix.translation(4,5,6) # =>
    #                                     1  0  0  4
    #                                     0  1  0  5
    #                                     0  0  1  6
    #                                     0  0  0  1
    #
    def translation *args
      xyz = args.shift if args.first.is_a?(NMatrix) || args.first.is_a?(Array)
      default_dtype = xyz.respond_to?(:dtype) ? xyz.dtype : NMatrix.guess_dtype(xyz)
      opts = {dtype: default_dtype}
      opts = opts.merge(args.pop) if args.size > 0 && args.last.is_a?(Hash)
      xyz ||= args

      n = if args.size > 0
        NMatrix.eye(4, opts)
      else
        NMatrix.eye(4, opts)
      end
      n[0..2,3] = xyz
      n
    end
  end

  #
  # call-seq:
  #     quaternion -> NMatrix
  #
  # Find the quaternion for a 3D rotation matrix.
  #
  # Code borrowed from: http://courses.cms.caltech.edu/cs171/quatut.pdf
  #
  # * *Returns* :
  #   - A length-4 NMatrix representing the corresponding quaternion.
  #
  # Examples:
  #
  #    n.quaternion # => [1, 0, 0, 0]
  #
  def quaternion
    raise(ShapeError, "Expected square matrix") if self.shape[0] != self.shape[1]
    raise(ShapeError, "Expected 3x3 rotation (or 4x4 homogeneous) matrix") if self.shape[0] > 4 || self.shape[0] < 3

    q = NMatrix.new([4], dtype: self.dtype == :float32 ? :float32: :float64)
    rotation_trace = self[0,0] + self[1,1] + self[2,2]
    if rotation_trace >= 0
      self_w = self.shape[0] == 4 ? self[3,3] : 1.0
      root_of_homogeneous_trace = Math.sqrt(rotation_trace + self_w)
      q[0] = root_of_homogeneous_trace * 0.5
      s = 0.5 / root_of_homogeneous_trace
      q[1] = (self[2,1] - self[1,2]) * s
      q[2] = (self[0,2] - self[2,0]) * s
      q[3] = (self[1,0] - self[0,1]) * s
    else
      h = 0
      h = 1 if self[1,1] > self[0,0]
      h = 2 if self[2,2] > self[h,h]

      case_macro = Proc.new do |i,j,k,ii,jj,kk|
        qq = NMatrix.new([4], dtype: :float64)
        self_w = self.shape[0] == 4 ? self[3,3] : 1.0
        s = Math.sqrt( (self[ii,ii] - (self[jj,jj] + self[kk,kk])) + self_w)
        qq[i] = s*0.5
        s = 0.5 / s
        qq[j] = (self[ii,jj] + self[jj,ii]) * s
        qq[k] = (self[kk,ii] + self[ii,kk]) * s
        qq[0] = (self[kk,jj] - self[jj,kk]) * s
        qq
      end

      case h
      when 0
        q = case_macro.call(1,2,3, 0,1,2)
      when 1
        q = case_macro.call(2,3,1, 1,2,0)
      when 2
        q = case_macro.call(3,1,2, 2,0,1)
      end

      self_w = self.shape[0] == 4 ? self[3,3] : 1.0
      if self_w != 1
        s = 1.0 / Math.sqrt(self_w)
        q[0] *= s
        q[1] *= s
        q[2] *= s
        q[3] *= s
      end
    end

    q
  end

  #
  # call-seq:
  #     angle_vector -> [angle, about_vector]
  #
  # Find the angle vector for a quaternion. Assumes the quaternion has unit length.
  #
  # Source: http://www.euclideanspace.com/maths/geometry/rotations/conversions/quaternionToAngle/
  #
  # * *Returns* :
  #   - An angle (in radians) describing the rotation about the +about_vector+.
  #   - A length-3 NMatrix representing the corresponding quaternion.
  #
  # Examples:
  #
  #    q.angle_vector # => [1, 0, 0, 0]
  #
  def angle_vector
    raise(ShapeError, "Expected length-4 vector or matrix (quaternion)") if self.shape[0] != 4
    raise("Expected unit quaternion") if self[0] > 1

    xyz = NMatrix.new([3], dtype: self.dtype)

    angle = 2 * Math.acos(self[0])
    s = Math.sqrt(1.0 - self[0]*self[0])

    xyz[0..2] = self[1..3]
    xyz /= s if s >= 0.001 # avoid divide by zero
    return [angle, xyz]
  end
end

================================================
FILE: lib/nmatrix/io/fortran_format.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2016, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2016, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == io/matlab/fortran_format.rb
#
# A parser for making sense of FORTRAN formats.
# => Only handles R (real), F (float) and E (exponential) format codes. 
#++

class NMatrix
  module IO
    module FortranFormat

      # Class for reading strings in FORTRAN format for specifying attributes
      # of numerical data in a file. Supports F (float), E (exponential) and 
      # R (real).
      # 
      # == Usage
      # 
      #   p = NMatrix::IO::FortranFormat::Reader.new("(16I5)")
      #   v = p.parse
      #   puts v #=> { :format_code => "INT_ID", 
      #          #=>   :repeat      =>       16,
      #          #=>   :field_width =>        5 }
      class Reader

        # Accepts a string in FORTRAN format and initializes the 
        # NMatrix::IO::FortranFormat::Reader object for further parsing of the 
        # data.
        # 
        # == Arguments
        # 
        # * +string+ - FORTRAN format string to be parsed.
        def initialize string
          @string = string
        end

        # Parses the FORTRAN format string passed in initialize and returns
        # a hash of the results.
        # 
        # == Result Hash Format
        # 
        # Take note that some of the below parameters may be absent in the hash
        # depending on the type of string being parsed.
        # 
        # * +:format_code+ - A string containing the format code of the read data. 
        #                    Can be "INT_ID", "FP_ID" or "EXP_ID" 
        # * +:repeat+      - Number of times this format will repeat in a line.
        # * +:field_width+ - Width of the numerical part of the number.
        # * +:post_decimal_width+ - Width of the numerals after the decimal point.
        # * +:exponent_width+ - Width of exponent part of the number.
        def parse
          raise(IOError, "Left or right parentheses missing") \
           if parentheses_missing? # change tests to handle 'raise' not return

          @result = {}
          @string = @string[1..-2]

          if valid_fortran_format?
            load_result
          else
            raise(IOError, "Invalid FORTRAN format specified. Only Integer, Float or Exponential acceptable.")
          end

          @result
        end

       private
        def parentheses_missing?
          true if @string[0] != '(' or @string[-1] != ')'
        end

        # Changing any of the following regular expressions can lead to disaster
        def valid_fortran_format?
          @mdata = @string.match(/\A(\d*)(I)(\d+)\z/) # check for integer format
          @mdata = @string.match(/\A(\d*)(F)(\d+)\.(\d+)\z/) \
           if @mdata.nil? # check for floating point if not integer
          @mdata =  @string.match(/\A(\d*)(E)(\d+)\.(\d+)(E)?(\d*)\z/) \
           if @mdata.nil? # check for exponential format if not floating point

          @mdata
        end

        def load_result
          if @mdata.to_a.include? "I"
            create_integer_hash
          elsif @mdata.to_a.include? "F"
            create_float_hash
          else
            create_exp_hash
          end
        end

        def create_integer_hash
          @result[:format_code] = "INT_ID"
          @result[:repeat]      = @mdata[1].to_i if !@mdata[1].empty?
          @result[:field_width] = @mdata[3].to_i
        end

        def create_float_hash
          @result[:format_code]        = "FP_ID"
          @result[:repeat]             = @mdata[1].to_i if !@mdata[1].empty?
          @result[:field_width]        = @mdata[3].to_i
          @result[:post_decimal_width] = @mdata[4].to_i
        end

        def create_exp_hash
          @result[:format_code]        = "EXP_ID"
          @result[:repeat]             = @mdata[1].to_i if !@mdata[1].empty?
          @result[:field_width]        = @mdata[3].to_i
          @result[:post_decimal_width] = @mdata[4].to_i
          @result[:exponent_width]     = @mdata[6].to_i if !@mdata[6].empty?
        end
      end
      
    end
  end
end

================================================
FILE: lib/nmatrix/io/harwell_boeing.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2016, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2016, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == io/matlab/harwell_boeing.rb
#
# Harwell Boeing file reader (and eventually writer too).
# => Supports only assembled, non-symmetric, real matrices
# => Data types supported are exponential, floating point and integer
# => Returned NMatrix is of type :float64
#++

require_relative './fortran_format.rb'

class NMatrix
  module IO
    module HarwellBoeing

      class << self
        # Loads the contents of a valid Harwell Boeing format file and 
        # returns an NMatrix object with the values of the file and optionally
        # only the header info.
        # 
        # Supports only assembled, non-symmetric, real matrices. File name must
        # have matrix type as extension.
        # 
        # Example - test_file.rua
        # 
        # == Arguments
        # 
        # * +file_path+ - Path of the Harwell Boeing file  to load.
        # * +opts+      - Options for specifying whether you want
        #                 the values and  header or only the header.
        # 
        # == Options
        # 
        # * +:header+ - If specified as *true*, will return only the header of
        #               the HB file.Will return the NMatrix object and
        #               header as an array if left blank.
        # 
        # == Usage
        # 
        #   mat, head = NMatrix::IO::HarwellBoeing.load("test_file.rua")
        # 
        #   head = NMatrix::IO::HarwellBoeing.load("test_file.rua", {header: true})
        # 
        # == Alternate Usage
        # 
        # You can specify the file using NMatrix::IO::Reader.new("path/to/file")
        # and then call *header* or *values* on the resulting object.
        def load file_path, opts={}
          hb_obj = NMatrix::IO::HarwellBoeing::Reader.new(file_path)

          return hb_obj.header if opts[:header]

          [hb_obj.values, hb_obj.header]
        end
      end

      class Reader
        def initialize file_name
          raise(IOError, "Unsupported file format. Specify file as \
            file_name.rua.") if !file_name.match(/.*\.[rR][uU][aA]/)

          @file_name   = file_name
          @header      = {}
          @body        = nil
        end

        def header
          return @header if !@header.empty?
          @file = File.open @file_name, "r"

          line = @file.gets

          @header[:title] = line[0...72].strip
          @header[:key]   = line[72...80].strip

          line = @file.gets

          @header[:totcrd] = line[0...14] .strip.to_i
          @header[:ptrcrd] = line[14...28].strip.to_i
          @header[:indcrd] = line[28...42].strip.to_i
          @header[:valcrd] = line[42...56].strip.to_i
          @header[:rhscrd] = line[56...70].strip.to_i

          raise(IOError, "Right hand sides not supported.") \
           if @header[:rhscrd] > 0

          line = @file.gets

          @header[:mxtype] = line[0...3]

          raise(IOError, "Currently supports only real, assembled, unsymmetric \
            matrices.") if !@header[:mxtype].match(/RUA/)

          @header[:nrow]   = line[13...28].strip.to_i
          @header[:ncol]   = line[28...42].strip.to_i
          @header[:nnzero] = line[42...56].strip.to_i
          @header[:neltvl] = line[56...70].strip.to_i

          line = @file.gets

          fortran_reader = NMatrix::IO::FortranFormat::Reader

          @header[:ptrfmt] = fortran_reader.new(line[0...16].strip) .parse
          @header[:indfmt] = fortran_reader.new(line[16...32].strip).parse
          @header[:valfmt] = fortran_reader.new(line[32...52].strip).parse
          @header[:rhsfmt] = fortran_reader.new(line[52...72].strip).parse

          @header
        end

        def values
          @header      = header if @header.empty?
          @file.lineno = 5      if @file.lineno != 5
          @matrix      = NMatrix.new([ @header[:nrow], @header[:ncol] ], 
                                      0, dtype: :float64)

          read_column_pointers
          read_row_indices
          read_values

          @file.close
          
          assemble_matrix

          @matrix
        end

       private

        def read_column_pointers
          @col_ptrs  = []
          pointer_lines     = @header[:ptrcrd]
          pointers_per_line = @header[:ptrfmt][:repeat]
          pointer_width     = @header[:ptrfmt][:field_width]

          @col_ptrs = read_numbers :to_i, pointer_lines, pointers_per_line, 
                                             pointer_width

          @col_ptrs.map! {|c| c -= 1}
        end

        def read_row_indices
          @row_indices     = []
          row_lines        = @header[:indcrd]
          indices_per_line = @header[:indfmt][:repeat]
          row_width        = @header[:indfmt][:field_width]

          @row_indices = read_numbers :to_i, row_lines, indices_per_line, 
                                      row_width

          @row_indices.map! {|r| r -= 1}
        end

        def read_values
          @vals = []
          value_lines = @header[:valcrd]
          values_per_line = @header[:valfmt][:repeat]
          value_width    = @header[:valfmt][:field_width]

          @vals = read_numbers :to_f, value_lines, values_per_line, 
                                  value_width
        end

        def read_numbers to_dtype, num_of_lines, numbers_per_line, number_width
          data = []

          num_of_lines.times do 
            line  = @file.gets
            index = 0

            numbers_per_line.times do
              delimiter = index + number_width

              data << line[index...delimiter].strip.send(to_dtype)

              break if line.length <= delimiter
              index += number_width
            end
          end

          data
        end

        def assemble_matrix
          col = 0
          @col_ptrs[0..-2].each_index do |index|
            @col_ptrs[index].upto(@col_ptrs[index+1] - 1) do |row_ptr|
              row               = @row_indices[row_ptr]
              @matrix[row, col] = @vals[row_ptr]
            end

            col += 1
          end
        end
      end

    end
  end
end

================================================
FILE: lib/nmatrix/io/market.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2016, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2016, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == io/market.rb
#
# MatrixMarket reader and writer.
#
#++

# Matrix Market is a repository of test data for use in studies of algorithms
# for numerical linear algebra. There are 3 file formats used:
#
# - Matrix Market Exchange Format.
# - Harwell-Boeing Exchange Format.
# - Coordinate Text File Format. (to be phased out)
#
# This module can load and save the first format. We might support
# Harwell-Boeing in the future.
#
# The MatrixMarket format is documented in:
# * http://math.nist.gov/MatrixMarket/formats.html
module NMatrix::IO::Market
  CONVERTER_AND_DTYPE = {
    :real => [:to_f, :float64],
    :complex => [:to_c, :complex128],
    :integer => [:to_i, :int64],
    :pattern => [:to_i, :byte]
  } #:nodoc:

  ENTRY_TYPE = {
    :byte => :integer, :int8 => :integer, :int16 => :integer,
    :int32 => :integer, :int64 => :integer,:float32 => :real,
    :float64 => :real, :complex64 => :complex, :complex128 => :complex
  } #:nodoc:

  class << self

    # call-seq:
    #     load(filename) -> NMatrix
    #
    # Load a MatrixMarket file. Requires a +filename+ as an argument.
    #
    # * *Arguments* :
    #   - +filename+ -> String with the filename to be saved.
    # * *Raises* :
    #   - +IOError+ -> expected type code line beginning with '%%MatrixMarket matrix'
    def load(filename)

      f = File.new(filename, "r")

      header = f.gets
      header.chomp!
      raise(IOError, "expected type code line beginning with '%%MatrixMarket matrix'") \
       if header !~ /^\%\%MatrixMarket\ matrix/

      header = header.split

      entry_type = header[3].downcase.to_sym
      symmetry   = header[4].downcase.to_sym
      converter, default_dtype = CONVERTER_AND_DTYPE[entry_type]

      if header[2] == 'coordinate'
        load_coordinate f, converter, default_dtype, entry_type, symmetry
      else
        load_array f, converter, default_dtype, entry_type, symmetry
      end
    end

    # call-seq:
    #     save(matrix, filename, options = {}) -> true
    #
    # Can optionally set :symmetry to :general, :symmetric, :hermitian; and can
    # set :pattern => true if you're writing a sparse matrix and don't want
    # values stored.
    #
    # * *Arguments* :
    #   - +matrix+ -> NMatrix with the data to be saved.
    #   - +filename+ -> String with the filename to be saved.
    # * *Raises* :
    #   - +DataTypeError+ -> MatrixMarket does not support Ruby objects.
    #   - +ArgumentError+ -> Expected two-dimensional NMatrix.
    def save(matrix, filename, options = {})
      options = {:pattern => false,
        :symmetry => :general}.merge(options)

      mode = matrix.stype == :dense ? :array : :coordinate
      if [:object].include?(matrix.dtype)
        raise(DataTypeError, "MatrixMarket does not support Ruby objects")
      end
      entry_type = options[:pattern] ? :pattern : ENTRY_TYPE[matrix.dtype]

      raise(ArgumentError, "expected two-dimensional NMatrix") \
       if matrix.dim != 2

      f = File.new(filename, 'w')

      f.puts "%%MatrixMarket matrix #{mode} #{entry_type} #{options[:symmetry]}"

      if matrix.stype == :dense
        save_array matrix, f, options[:symmetry]
      elsif [:list,:yale].include?(matrix.stype)
        save_coordinate matrix, f, options[:symmetry], options[:pattern]
      end

      f.close

      true
    end


    protected

    def save_coordinate matrix, file, symmetry, pattern
      # Convert to a hash in order to store
      rows = matrix.to_h

      # Count non-zeros
      count = 0
      rows.each_pair do |i, columns|
        columns.each_pair do |j, val|
          next if symmetry != :general && j > i
          count += 1
        end
      end

      # Print dimensions and non-zeros
      file.puts "#{matrix.shape[0]}\t#{matrix.shape[1]}\t#{count}"

      # Print coordinates
      rows.each_pair do |i, columns|
        columns.each_pair do |j, val|
          next if symmetry != :general && j > i
          file.puts(pattern ? "\t#{i+1}\t#{j+1}" : "\t#{i+1}\t#{j+1}\t#{val}")
        end
      end

      file
    end


    def save_array matrix, file, symmetry
      file.puts [matrix.shape[0], matrix.shape[1]].join("\t")

      if symmetry == :general
        (0...matrix.shape[1]).each do |j|
          (0...matrix.shape[0]).each do |i|
            file.puts matrix[i,j]
          end
        end
      else # :symmetric, :'skew-symmetric', :hermitian
        (0...matrix.shape[1]).each do |j|
          (j...matrix.shape[0]).each do |i|
            file.puts matrix[i,j]
          end
        end
      end

      file
    end


    def load_array file, converter, dtype, entry_type, symmetry
      mat = nil

      line = file.gets
      line.chomp!
      line.lstrip!

      fields = line.split

      mat = NMatrix.new :dense, [fields[0].to_i, fields[1].to_i], dtype

      (0...mat.shape[1]).each do |j|
        (0...mat.shape[0]).each do |i|
          datum = file.gets.chomp.send(converter)
          mat[i,j] = datum

          unless i == j || symmetry == :general
            if symmetry == :symmetric
              mat[j,i] = datum
            elsif symmetry == :hermitian
              mat[j,i] = Complex.new(datum.real, -datum.imag)
            elsif symmetry == :'skew-symmetric'
              mat[j,i] = -datum
            end
          end
        end
      end

      file.close

      mat
    end


    # Creates a :list NMatrix from a coordinate-list MatrixMarket file.
    def load_coordinate file, converter, dtype, entry_type, symmetry

      mat = nil

      # Read until we get the dimensions and nonzeros
      while line = file.gets
        line.chomp!
        line.lstrip!
        line, comment = line.split('%', 2) # ignore comments
        if line.size > 4
          shape0, shape1 = line.split
          mat = NMatrix.new(:list, [shape0.to_i, shape1.to_i], 0, dtype)
          break
        end
      end

      # Now read the coordinates
      while line = file.gets
        line.chomp!
        line.lstrip!
        line, comment = line.split('%', 2) # ignore comments

        next unless line.size >= 5 # ignore empty lines

        fields = line.split

        i = fields[0].to_i - 1
        j = fields[1].to_i - 1
        datum = entry_type == :pattern ? 1 : fields[2].send(converter)

        mat[i, j] = datum # add to the matrix
        unless i == j || symmetry == :general
          if symmetry == :symmetric
            mat[j, i] = datum
          elsif symmetry == :'skew-symmetric'
            mat[j, i] = -datum
          elsif symmetry == :hermitian
            mat[j, i] = Complex.new(datum.real, -datum.imag)
          end
        end
      end

      file.close

      mat
    end
  end
end


================================================
FILE: lib/nmatrix/io/mat5_reader.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2016, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2016, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == io/matlab/mat5_reader.rb
#
# Matlab version 5 .mat file reader (and eventually writer too).
#
#++

require_relative './mat_reader.rb'

module NMatrix::IO::Matlab

  # Reader (and eventual writer) for a version 5 .mat file.
  class Mat5Reader < MatReader #:nodoc:
    attr_reader :file_header, :first_tag_field, :first_data_field

    class Compressed #:nodoc:
      include Packable

      attr_reader :byte_order

      def initialize(stream = nil, byte_order = nil, content_or_bytes = nil)
        @stream   = stream
        @byte_order = byte_order

        if content_or_bytes.is_a?(String)
          @content = content_or_bytes

        elsif content_or_bytes.is_a?(Integer)
          @padded_bytes = content_or_bytes
        end
      end

      def compressed
        require "zlib"
        # [2..-5] removes headers
        @compressed ||= Zlib::Deflate.deflate(content)
      end

      def content
        @content ||= extract
      end

      def padded_bytes
        @padded_bytes ||= content.size % 4 == 0 ? \
         content.size : (content.size / 4 + 1) * 4
      end

      def write_packed(packedio, options = {})
        packedio << [compressed, {:bytes => padded_bytes}.merge(options)]
      end

      def read_packed(packedio, options)
        @compressed = (packedio >> [String, options]).first
        content
      end

      protected
      def extract
        require 'zlib'

        zstream = Zlib::Inflate.new #(-Zlib::MAX_WBITS) # No header

        returning(zstream.inflate(@compressed)) do
          zstream.finish
          zstream.close
        end
      end
    end

    MatrixDataStruct = Struct.new(
                                  :cells, :logical, :global, :complex,
                                  :nonzero_max,:matlab_class, :dimensions,
                                  :matlab_name, :real_part,:imaginary_part,
                                  :row_index, :column_index)

    class MatrixData < MatrixDataStruct #:nodoc:
      include Packable

      def write_packed(packedio, options)
        raise NotImplementedError
        packedio << [info, {:bytes => padded_bytes}.merge(options)]
      end

      # call-seq:
      #     to_ruby -> NMatrix
      #     to_ruby -> Array
      #
      # Figure out the appropriate Ruby type to convert to, and do it. There
      # are basically two possible types: +NMatrix+ and +Array+. This method
      # is recursive, so an +Array+ is going to contain other +Array+s and/or
      # +NMatrix+ objects.
      #
      # mxCELL types (cells) will be converted to the Array type.
      #
      # mxSPARSE and other types will be converted to NMatrix, with the
      # appropriate stype (:yale or :dense, respectively).
      #
      # See also to_nm, which is responsible for NMatrix instantiation.
      def to_ruby
        case matlab_class
        when :mxSPARSE then return to_nm
        when :mxCELL  then return self.cells.collect { |c| c.to_ruby }
        else         return to_nm
        end
      end

      # call-seq:
      #     guess_dtype_from_mdtype -> Symbol
      #
      # Try to determine what dtype and such to use.
      #
      # TODO: Needs to be verified that unsigned MATLAB types are being
      # converted to the correct NMatrix signed dtypes.
      def guess_dtype_from_mdtype
        dtype = MatReader::MDTYPE_TO_DTYPE[self.real_part.tag.data_type]

        return dtype unless self.complex

        dtype == :float32 ? :complex64 : :complex128
      end

      #
      # call-seq:
      #     unpacked_data(real_mdtype = nil, imag_mdtype = nil) ->
      #
      # Unpacks data without repacking it.
      #
      # Used only for dense matrix creation. Yale matrix creation uses
      # repacked_data.
      #
      def unpacked_data(real_mdtype = nil, imag_mdtype = nil)
        # Get Matlab data type and unpack args
        real_mdtype ||= self.real_part.tag.data_type
        real_unpack_args = MatReader::MDTYPE_UNPACK_ARGS[real_mdtype]

        # zip real and complex components together, or just return real component
        if self.complex
          imag_mdtype ||= self.imaginary_part.tag.data_type
          imag_unpack_args = MatReader::MDTYPE_UNPACK_ARGS[imag_mdtype]

          unpacked_real = self.real_part.data.unpack(real_unpack_args)
          unpacked_imag = self.imaginary_part.data.unpack(imag_unpack_args)

          unpacked_real.zip(unpacked_imag).flatten
        else
          length = self.dimensions.inject(1) { |a,b| a * b } # get the product
          self.real_part.data.unpack(*(real_unpack_args*length))
        end

      end

      # Unpacks and repacks data into the appropriate format for NMatrix.
      #
      # If data is already in the appropriate format, does not unpack or
      # repack, just returns directly.
      #
      # Complex is always unpacked and repacked, as the real and imaginary
      # components must be merged together (MATLAB stores them separately for
      # some crazy reason).
      #
      # Used only for Yale storage creation. For dense, see unpacked_data.
      #
      # This function calls repack and complex_merge, which are both defined in
      # io.cpp.
      def repacked_data(to_dtype = nil)

        real_mdtype = self.real_part.tag.data_type

        # Figure out what dtype to use based on the MATLAB data-types
        # (mdtypes). They could be different for real and imaginary, so call
        # upcast to figure out what to use.

        components = [] # real and imaginary parts or just the real part

        if self.complex
          imag_mdtype = self.imaginary_part.tag.data_type

          # Make sure we convert both mdtypes do the same dtype
          to_dtype ||= NMatrix.upcast(MatReader::MDTYPE_TO_DTYPE[real_mdtype], \
           MatReader::MDTYPE_TO_DTYPE[imag_mdtype])

          # Let's make sure we don't try to send NMatrix complex integers.
          #  We need complex floating points.
          unless [:float32, :float64].include?(to_dtype)
            to_dtype = NMatrix.upcast(to_dtype, :float32)
          end

          STDERR.puts "imag: Requesting dtype #{to_dtype.inspect}"
          # Repack the imaginary part
          components[1] = ::NMatrix::IO::Matlab.repack( self.imaginary_part.data, \
           imag_mdtype, :dtype => to_dtype )

        else

          to_dtype ||= MatReader::MDTYPE_TO_DTYPE[real_mdtype]

          # Sometimes repacking isn't necessary -- sometimes the format is already good
          if MatReader::NO_REPACK.include?(real_mdtype)
            STDERR.puts "No repack"
            return [self.real_part.data, to_dtype]
          end

        end

        # Repack the real part
        STDERR.puts "real: Requesting dtype #{to_dtype.inspect}"
        components[0] = ::NMatrix::IO::Matlab.repack( \
         self.real_part.data, real_mdtype, :dtype => to_dtype )

        # Merge the two parts if complex, or just return the real part.
        [self.complex ? ::NMatrix::IO::Matlab.complex_merge( \
         components[0], components[1], to_dtype ) : components[0],
         to_dtype]
      end

      # Unpacks and repacks index data into the appropriate format for NMatrix.
      #
      # If data is already in the appropriate format, does not unpack or
      # repack, just returns directly.
      def repacked_indices
        repacked_row_indices = ::NMatrix::IO::Matlab.repack( \
         self.row_index.data, :miINT32, :itype )
        repacked_col_indices = ::NMatrix::IO::Matlab.repack( \
         self.column_index.data, :miINT32, :itype )

        [repacked_row_indices, repacked_col_indices]
      end

      #
      # call-seq:
      #     to_nm(dtype = nil) -> NMatrix
      #
      # Create an NMatrix from a MATLAB .mat (v5) matrix.
      #
      # This function matches the storage type exactly. That is, a regular
      # matrix in MATLAB will be a dense NMatrix, and a sparse (old Yale) one
      # in MATLAB will be a :yale (new Yale) matrix in NMatrix.
      #
      # Note that NMatrix has no old Yale type, so this uses a semi-hidden
      # version of the NMatrix constructor to pass in --- as directly as
      # possible -- the stored bytes in a MATLAB sparse matrix. This
      # constructor should also be used for other IO formats that want to
      # create sparse matrices from IA and JA vectors (e.g., SciPy).
      #
      # This is probably not the fastest code. An ideal solution would be a C
      # plugin of some sort for reading the MATLAB .mat file. However, .mat v5
      # is a really complicated format, and lends itself to an object-oriented
      # solution.
      #
      def to_nm(dtype = nil)
        # Hardest part is figuring out from_dtype, from_index_dtype, and dtype.
        dtype   ||= guess_dtype_from_mdtype
        from_dtype = MatReader::MDTYPE_TO_DTYPE[self.real_part.tag.data_type]

        # Create the same kind of matrix that MATLAB saved.
        case matlab_class
        when :mxSPARSE
          raise(NotImplementedError, "expected .mat row indices to be of type :miINT32") unless row_index.tag.data_type == :miINT32
          raise(NotImplementedError, "expected .mat column indices to be of type :miINT32") unless column_index.tag.data_type == :miINT32
          #require 'pry'
          #binding.pry

          # MATLAB always uses :miINT32 for indices according to the spec
          ia_ja                     = repacked_indices
          data_str, repacked_dtype  = repacked_data(dtype)
          NMatrix.new(:yale, self.dimensions.reverse, repacked_dtype, \
           ia_ja[0], ia_ja[1], data_str, repacked_dtype)

        else
          # Call regular dense constructor.
          NMatrix.new(:dense, self.dimensions.reverse, unpacked_data, dtype).transpose
        end
      end

      def read_packed(packedio, options)
        flags_class, self.nonzero_max = packedio.read([Element, options]).data

        self.matlab_class   = MatReader::MCLASSES[flags_class % 16]

        self.logical        = (flags_class >> 8) % 2 == 1 ? true : false
        self.global         = (flags_class >> 9) % 2 == 1 ? true : false
        self.complex        = (flags_class >> 10) % 2 == 1 ? true : false

        dimensions_tag_data = packedio.read([Element, options])
        self.dimensions     = dimensions_tag_data.data

        begin
          name_tag_data   = packedio.read([Element, options])
          self.matlab_name = name_tag_data.data.is_a?(Array) ? \
           name_tag_data.data.collect { |i| i.chr }.join('') : \
           name_tag_data.data.chr

        rescue ElementDataIOError => e
          STDERR.puts "ERROR: Failure while trying to read Matlab variable name: #{name_tag_data.inspect}"
          STDERR.puts 'Element Tag:'
          STDERR.puts "    #{e.tag}"
          STDERR.puts 'Previously, I read these dimensions:'
          STDERR.puts "    #{dimensions_tag_data.inspect}"
          STDERR.puts "Unpack options were: #{options.inspect}"
          raise(e)
        end

        if self.matlab_class == :mxCELL
          # Read what may be a series of matrices
          self.cells = []
          STDERR.puts("Warning: Cell array does not yet support reading multiple dimensions") if dimensions.size > 2 || (dimensions[0] > 1 && dimensions[1] > 1)
          number_of_cells = dimensions.inject(1) { |prod,i| prod * i }
          number_of_cells.times { self.cells << \
           packedio.read([Element, options]) }

        else
          read_opts = [RawElement, {:bytes => options[:bytes], \
           :endian => :native}]

          if self.matlab_class == :mxSPARSE
            self.column_index = packedio.read(read_opts)
            self.row_index    = packedio.read(read_opts)
          end

          self.real_part   = packedio.read(read_opts)
          self.imaginary_part = packedio.read(read_opts) if self.complex
        end
      end

      def ignore_padding(packedio, bytes)
        packedio.read([Integer, {:unsigned => true, \
         :bytes => bytes}]) if bytes > 0
      end
    end


    MDTYPE_UNPACK_ARGS =
      MatReader::MDTYPE_UNPACK_ARGS.merge({
                                            :miCOMPRESSED => [Compressed, {}],
                                            :miMATRIX   => [MatrixData, {}]
                                          })

    FIRST_TAG_FIELD_POS = 128

    ###################################
    # Instance Methods for Mat5Reader #
    ###################################

    # call-seq:
    #     NMatrix::IO::Mat5Reader.new(stream, options = {}) -> NMatrix
    def initialize(stream, options = {})
      super(stream, options)
      @file_header = seek_and_read_file_header
    end

    def to_a
      returning(Array.new) do |ary|
        self.each { |el| ary << el }
      end
    end

    def to_ruby
      ary = self.to_a

      if ary.size == 1
        ary.first.to_ruby
      else
        ary.collect { |item| item.to_ruby }
      end
    end

    def guess_byte_order
      stream.seek(Header::BYTE_ORDER_POS)
      mi = stream.read(Header::BYTE_ORDER_LENGTH)
      stream.seek(0)
      mi == 'IM' ? :little : :big
    end

    def seek_and_read_file_header
      stream.seek(0)
      stream.read(FIRST_TAG_FIELD_POS).unpack(Header, {:endian => byte_order})
    end

    def each(&block)
      stream.each(Element, {:endian => byte_order}) do |element|
        if element.data.is_a?(Compressed)
          StringIO.new(element.data.content, 'rb').each(Element, \
             {:endian => byte_order}) do |compressed_element|
            yield compressed_element.data
          end

        else
          yield element.data
        end
      end

      # Go back to the beginning in case we want to do it again.
      stream.seek(FIRST_TAG_FIELD_POS)

      self
    end

    # Internal Classes.

    class Header < Struct.new(:desc, :data_offset, :version, :endian) #:nodoc:

      include Packable

      BYTE_ORDER_LENGTH  = 2
      DESC_LENGTH     = 116
      DATA_OFFSET_LENGTH = 8
      VERSION_LENGTH   = 2
      BYTE_ORDER_POS   = 126

      # TODO: TEST WRITE.
      def write_packed(packedio, options)
        packedio << [desc,    {:bytes => DESC_LENGTH    }] <<
          [data_offset, {:bytes => DATA_OFFSET_LENGTH }] <<
          [version,   {:bytes => VERSION_LENGTH   }] <<
          [byte_order, {:bytes => BYTE_ORDER_LENGTH }]
      end

      def read_packed(packedio, options)
        self.desc, self.data_offset, self.version, self.endian = packedio >>
          [String, {:bytes => DESC_LENGTH                 }] >>
          [String, {:bytes => DATA_OFFSET_LENGTH              }] >>
          [Integer, {:bytes => VERSION_LENGTH, :endian => options[:endian] }] >>
          [String, {:bytes => 2                      }]

        self.desc.strip!
        self.data_offset.strip!
        self.data_offset = nil if self.data_offset.empty?

        self.endian == 'IM' ? :little : :big
      end
    end

    class Tag < Struct.new(:data_type, :raw_data_type, :bytes, :small) #:nodoc:
      include Packable

      DATA_TYPE_OPTS = BYTES_OPTS = {:bytes => 4, :signed => false}
      LENGTH = DATA_TYPE_OPTS[:bytes] + BYTES_OPTS[:bytes]

      # TODO: TEST WRITE.
      def write_packed packedio, options
        packedio << [data_type, DATA_TYPE_OPTS] << [bytes, BYTES_OPTS]
      end

      def small?
        self.bytes > 0 and self.bytes <= 4
      end

      def size
        small? ? 4 : 8
      end

      def read_packed packedio, options
        self.raw_data_type = packedio.read([Integer, \
         DATA_TYPE_OPTS.merge(options)])

        # Borrowed from a SciPy patch
        upper = self.raw_data_type >> 16
        lower = self.raw_data_type & 0xFFFF

        if upper > 0
          # Small data element format
          raise IOError, 'Small data element format indicated, but length is more than 4 bytes!' if upper > 4

          self.bytes     = upper
          self.raw_data_type = lower

        else
          self.bytes = packedio.read([Integer, BYTES_OPTS.merge(options)])
        end

        self.data_type = MatReader::MDTYPES[self.raw_data_type]
      end

      def inspect
        "#<#{self.class.to_s} data_type=#{data_type}[#{raw_data_type}][#{raw_data_type.to_s(2)}] bytes=#{bytes} size=#{size}#{small? ? ' small' : ''}>"
      end
    end


    class ElementDataIOError < IOError #:nodoc:
      attr_reader :tag

      def initialize(tag = nil, msg = nil)
        @tag = tag
        super msg
      end

      def to_s
        @tag.inspect + "\n" + super
      end
    end


    class Element < Struct.new(:tag, :data) #:nodoc:
      include Packable

      def write_packed packedio, options
        packedio << [tag, {}] << [data, {}]
      end

      def read_packed(packedio, options)
        raise(ArgumentError, 'Missing mandatory option :endian.') \
         unless options.has_key?(:endian)

        tag = packedio.read([Tag, {:endian => options[:endian]}])
        data_type = MDTYPE_UNPACK_ARGS[tag.data_type]

        self.tag = tag

        raise ElementDataIOError.new(tag, "Unrecognized Matlab type #{tag.raw_data_type}") \
         if data_type.nil?

        if tag.bytes == 0
          self.data = []

        else
          number_of_reads = data_type[1].has_key?(:bytes) ? \
           tag.bytes / data_type[1][:bytes] : 1
          data_type[1].merge!({:endian => options[:endian]})

          if number_of_reads == 1
            self.data = packedio.read(data_type)

          else
            self.data =
              returning(Array.new) do |ary|
              number_of_reads.times { ary << packedio.read(data_type) }
            end
          end

          begin
            ignore_padding(packedio, (tag.bytes + tag.size) % 8) \
             unless [:miMATRIX, :miCOMPRESSED].include?(tag.data_type)

          rescue EOFError
            STDERR.puts self.tag.inspect
            raise(ElementDataIOError.new(tag, "Ignored too much"))
          end
        end
      end

      def ignore_padding(packedio, bytes)
        if bytes > 0
          #STDERR.puts "Ignored #{8 - bytes} on #{self.tag.data_type}"
          ignored = packedio.read(8 - bytes)
          ignored_unpacked = ignored.unpack("C*")
          raise(IOError, "Nonzero padding detected: #{ignored_unpacked}") \
           if ignored_unpacked.any? { |i| i != 0 }
        end
      end

      def to_ruby
        data.to_ruby
      end
    end

    # Doesn't unpack the contents of the element, e.g., if we want to handle
    # manually, or pass the raw string of bytes into NMatrix.
    class RawElement < Element #:nodoc:
      def read_packed(packedio, options)
        raise(ArgumentError, 'Missing mandatory option :endian.') \
         unless options.has_key?(:endian)

        self.tag = packedio.read([Tag,   {:endian => options[:endian]}])
        self.data = packedio.read([String, {:endian => options[:endian], \
         :bytes => tag.bytes }])

        begin
          ignore_padding(packedio, (tag.bytes + tag.size) % 8) \
           unless [:miMATRIX, :miCOMPRESSED].include?(tag.data_type)

        rescue EOFError
          STDERR.puts self.tag.inspect
          raise ElementDataIOError.new(tag, 'Ignored too much.')
        end
      end
    end

    #####################
    # End of Mat5Reader #
    #####################

  end
end


================================================
FILE: lib/nmatrix/io/mat_reader.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2016, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2016, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == io/mat_reader.rb
#
# Base class for .mat file reading (Matlab files).
#
#++

require 'packable'

module NMatrix::IO::Matlab

  # Class for parsing a .mat file stream.
  #
  # The full format of .mat files is available here:
  # * http://www.mathworks.com/help/pdf_doc/matlab/matfile_format.pdf
  class MatReader #:nodoc:
    MDTYPE_UNPACK_ARGS = {
      :miINT8   => [Integer, {:signed    => true,    :bytes => 1}],
      :miUINT8  => [Integer, {:signed    => false,   :bytes => 1}],
      :miINT16  => [Integer, {:signed    => true,    :bytes => 2}],
      :miUINT16 => [Integer, {:signed    => false,   :bytes => 2}],
      :miINT32  => [Integer, {:signed    => true,    :bytes => 4}],
      :miUINT32 => [Integer, {:signed    => false,   :bytes => 4}],
      :miSINGLE => [Float,   {:precision => :single,
                              :bytes => 4, :endian => :native}],
      :miDOUBLE => [Float,   {:precision => :double,
                              :bytes => 4, :endian => :native}],
      :miINT64  => [Integer, {:signed    => true,    :bytes => 8}],
      :miUINT64 => [Integer, {:signed    => false,   :bytes => 8}]
    }

    DTYPE_PACK_ARGS = {
      :byte       => [Integer, {:signed => false,
                                :bytes => 1}],
      :int8       => [Integer, {:signed => true,
                                :bytes => 1}],
      :int16      => [Integer, {:signed => true,
                                :bytes => 2}],
      :int32      => [Integer, {:signed => true,
                                :bytes => 4}],
      :int64      => [Integer, {:signed => true,
                                :bytes => 8}],
      :float32    => [Float,   {:precision => :single,
                                :bytes => 4, :endian => :native}],
      :float64    => [Float,   {:precision => :double,
                                :bytes => 8, :endian => :native}],
      :complex64  => [Float,   {:precision => :single,
                                :bytes => 4, :endian => :native}], #2x
      :complex128 => [Float,   {:precision => :double,
                                :bytes => 8, :endian => :native}]
    }

    ITYPE_PACK_ARGS = {
      :uint8  => [Integer, {:signed => false, :bytes => 1}],
      :uint16 => [Integer, {:signed => false, :bytes => 2}],
      :uint32 => [Integer, {:signed => false, :bytes => 4}],
      :uint64 => [Integer, {:signed => false, :bytes => 8}],
    }

    NO_REPACK = [:miINT8, :miUINT8, :miINT16,
                 :miINT32, :miSINGLE, :miDOUBLE, :miINT64]

    # Convert from MATLAB dtype to NMatrix dtype.
    MDTYPE_TO_DTYPE = {
      :miUINT8  => :byte,
      :miINT8   => :int8,
      :miINT16  => :int16,
      :miUINT16 => :int16,
      :miINT32  => :int32,
      :miUINT32 => :int32,
      :miINT64  => :int64,
      :miUINT64 => :int64,
      :miSINGLE => :float32,
      :miDOUBLE => :float64
    }

    MDTYPE_TO_ITYPE = {
      :miUINT8  => :uint8,
      :miINT8   => :uint8,
      :miINT16  => :uint16,
      :miUINT16 => :uint16,
      :miINT32  => :uint32,
      :miUINT32 => :uint32,
      :miINT64  => :uint64,
      :miUINT64 => :uint64
    }

    # Before release v7.1 (release 14) matlab (TM) used the system
    # default character encoding scheme padded out to 16-bits. Release 14
    # and later use Unicode. When saving character data, R14 checks if it
    # can be encoded in 7-bit ascii, and saves in that format if so.
    MDTYPES = [
               nil,
               :miINT8,
               :miUINT8,
               :miINT16,
               :miUINT16,
               :miINT32,
               :miUINT32,
               :miSINGLE,
               nil,
               :miDOUBLE,
               nil,
               nil,
               :miINT64,
               :miUINT64,
               :miMATRIX,
               :miCOMPRESSED,
               :miUTF8,
               :miUTF16,
               :miUTF32
              ]

    MCLASSES = [
                nil,
                :mxCELL,
                :mxSTRUCT,
                :mxOBJECT,
                :mxCHAR,
                :mxSPARSE,
                :mxDOUBLE,
                :mxSINGLE,
                :mxINT8,
                :mxUINT8,
                :mxINT16,
                :mxUINT16,
                :mxINT32,
                :mxUINT32,
                :mxINT64,
                :mxUINT64,
                :mxFUNCTION,
                :mxOPAQUE,
                :mxOBJECT_CLASS_FROM_MATRIX_H
               ]

    attr_reader :byte_order


    # call-seq:
    #     new(stream, options = {}) -> MatReader
    #
    # * *Raises* :
    #   - +ArgumentError+ -> First argument must be IO.
    #
    def initialize(stream, options = {})
      raise ArgumentError, 'First arg must be IO.' unless stream.is_a?(::IO)

      @stream     = stream
      @byte_order = options[:byte_order] || guess_byte_order
    end

    # call-seq:
    #     guess_byte_order -> Symbol
    #
    def guess_byte_order
      # Assume native, since we don't know what type of file we have.
      :native
    end

    protected

    attr_reader :stream
  end
end


================================================
FILE: lib/nmatrix/io/point_cloud.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2016, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2016, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == io/point_cloud.rb
#
# Point Cloud Library (PCL) PCD file IO functions.
#
#++

# Reader for Point Cloud Data (PCD) file format.
#
# The documentation of this format can be found in:
#
# http://pointclouds.org/documentation/tutorials/pcd_file_format.php
#
# Note that this implementation does not take the width or height parameters
# into account.
module NMatrix::IO::PointCloud

  # For UINT, just add 1 to the index.
  INT_DTYPE_BY_SIZE   = [:int8, :int8, :int16, :int32, :int64, :int64] #:nodoc:
  FLOAT_DTYPE_BY_SIZE = {4 => :float32, 8 => :float64} #:nodoc:

  class << self
    # call-seq:
    #     load(filename) -> NMatrix
    #
    # * *Arguments* :
    #   - +filename+ -> String giving the name of the file to be loaded.
    #
    # Load a Point Cloud Library PCD file as a matrix.
    def load(filename)
      MetaReader.new(filename).matrix
    end
  end

  class MetaReader #:nodoc:
    ENTRIES = [:version,  :fields,           :size,  :type,
               :count,  :width,  :height,  :viewpoint,  :points,  :data]
    ASSIGNS = [:version=, :fields=,          :size=, :type=,
               :count=, :width=, :height=, :viewpoint=, :points=, :data=]
    CONVERT = [:to_s,     :downcase_to_sym,  :to_i,  :downcase_to_sym,
      :to_i,   :to_i,   :to_i,    :to_f,       :to_i,    :downcase_to_sym]

    DTYPE_CONVERT = {:byte => :to_i, :int8 => :to_i, :int16 => :to_i,
           :int32 => :to_i, :float32 => :to_f, :float64 => :to_f}

    # For UINT, just add 1 to the index.
    INT_DTYPE_BY_SIZE   = {1 => :int8,    2 => :int16,   4 => :int32,
       8 => :int64,  16 => :int64}
    FLOAT_DTYPE_BY_SIZE = {1 => :float32, 2 => :float32, 4 => :float32,
       8 => :float64,16 => :float64}

    class << self

      # Given a type and a number of bytes, figure out an appropriate dtype
      def dtype_by_type_and_size t, s
        if t == :f
          FLOAT_DTYPE_BY_SIZE[s]
        elsif t == :u
          return :byte if s == 1
          INT_DTYPE_BY_SIZE[s*2]
        else
          INT_DTYPE_BY_SIZE[s]
        end
      end
    end

    # call-seq:
    #     PointCloudReader::MetaReader.new(filename) -> MetaReader
    #
    # * *Arguments* :
    #   - +filename+ -> String giving the name of the file to be loaded.
    # * *Raises* :
    #   - +NotImplementedError+ -> only ASCII supported currently
    #   - +IOError+ -> premature end of file
    #
    # Open a file and read the metadata at the top; then read the PCD into an
    # NMatrix.
    #
    # In addition to the fields in the PCD file, there will be at least one
    # additional attribute, :matrix, storing the data.
    def initialize filename
      f = File.new(filename, "r")

      ENTRIES.each.with_index do |entry,i|
        read_entry(f, entry, ASSIGNS[i], CONVERT[i])
      end

      raise(NotImplementedError, "only ASCII supported currently") \
       unless self.data.first == :ascii

      @matrix = NMatrix.new(self.shape, dtype: self.dtype)

      # Do we want to use to_i or to_f?
      convert = DTYPE_CONVERT[self.dtype]

      i = 0
      while line = f.gets
        @matrix[i,:*] = line.chomp.split.map { |f| f.send(convert) }
        i += 1
      end

      raise(IOError, "premature end of file") if i < self.points[0]

    end

    attr_accessor *ENTRIES
    attr_reader :matrix

  protected
    # Read the current entry of the header.
    def read_entry f, entry, assign=nil, convert=nil
      assign ||= (entry.to_s + "=").to_sym

      while line = f.gets
        next if line =~ /^\s*#/ # ignore comment lines
        line = line.chomp.split(/\s*#/)[0] # ignore the comments after any data

        # Split, remove the entry name, and convert to the correct type.
        self.send(assign,
                  line.split.tap { |t| t.shift }.map do |f|
                    if convert.nil?
                      f
                    elsif convert == :downcase_to_sym
                      f.downcase.to_sym
                    else
                      f.send(convert)
                    end
                  end)

        # We don't really want to loop.
        break
      end

      self.send(entry)
    end


    # Determine the dtype for a matrix based on the types and
    #  sizes given in the PCD.
    #  Call this only after read_entry has been called.
    def dtype
      @dtype ||= begin
        dtypes = self.type.map.with_index do |t,k|
          MetaReader.dtype_by_type_and_size(t, size[k])
        end.sort.uniq

        # This could probably save one comparison at most, but we assume that
        # worst case isn't going to happen very often.
        while dtypes.size > 1
          d = NMatrix.upcast(dtypes[0], dtypes[1])
          dtypes.shift
          dtypes[0] = d
        end

        dtypes[0]
      end
    end

    # Determine the shape of the matrix.
    def shape
      @shape ||= [
          self.points[0],
          self.fields.size
      ]
    end
  end
end


================================================
FILE: lib/nmatrix/jruby/decomposition.rb
================================================
class NMatrix

  # discussion in https://github.com/SciRuby/nmatrix/issues/374

  def matrix_solve rhs
    if rhs.shape[1] > 1
      nmatrix = NMatrix.new :copy
      nmatrix.shape = rhs.shape
      res = []
      #Solve a matrix and store the vectors in a matrix
      (0...rhs.shape[1]).each do |i|
        res << self.solve(rhs.col(i)).s.toArray.to_a
      end
      #res is in col major format
      result = ArrayGenerator.getArrayColMajorDouble res.to_java :double, rhs.shape[0], rhs.shape[1]
      nmatrix.s = ArrayRealVector.new result

      return nmatrix
    else
      return self.solve rhs
    end
  end

end

================================================
FILE: lib/nmatrix/jruby/enumerable.rb
================================================
# Source: https://github.com/marcandre/backports/blob/master/lib/backports/rails/enumerable.rb
module Enumerable
  # Standard in rails... See official documentation[http://api.rubyonrails.org/classes/Enumerable.html]
  # Modified from rails 2.3 to not rely on size
  def sum(identity = 0, &block)
    if block_given?
      map(&block).sum(identity)
    else
      inject { |sum, element| sum + element } || identity
    end
  end unless method_defined? :sum

end

================================================
FILE: lib/nmatrix/jruby/error.rb
================================================
DataTypeError = Class.new(StandardError)
StorageTypeError = Class.new(StandardError)
ShapeError = Class.new(StandardError)
NotInvertibleError = Class.new(StandardError)

================================================
FILE: lib/nmatrix/jruby/math.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == math.rb
#
# Math functionality for NMatrix, along with any NMatrix instance
# methods that correspond to ATLAS/BLAS/LAPACK functions (e.g.,
# laswp).
#++

class NMatrix

  #
  # call-seq:
  #     getrf! -> Array
  #
  # LU factorization of a general M-by-N matrix +A+ using partial pivoting with
  # row interchanges. The LU factorization is A = PLU, where P is a row permutation
  # matrix, L is a lower triangular matrix with unit diagonals, and U is an upper
  # triangular matrix (note that this convention is different from the
  # clapack_getrf behavior, but matches the standard LAPACK getrf).
  # +A+ is overwritten with the elements of L and U (the unit
  # diagonal elements of L are not saved). P is not returned directly and must be
  # constructed from the pivot array ipiv. The row indices in ipiv are indexed
  # starting from 1.
  # Only works for dense matrices.
  #
  # * *Returns* :
  #   - The IPIV vector. The L and U matrices are stored in A.
  # * *Raises* :
  #   - +StorageTypeError+ -> ATLAS functions only work on dense matrices.
  #
  def getrf!
    ipiv = LUDecomposition.new(self.twoDMat).getPivot.to_a
    return ipiv
  end

  #
  # call-seq:
  #     geqrf! -> shape.min x 1 NMatrix
  #
  # QR factorization of a general M-by-N matrix +A+.
  #
  # The QR factorization is A = QR, where Q is orthogonal and R is Upper Triangular
  # +A+ is overwritten with the elements of R and Q with Q being represented by the
  # elements below A's diagonal and an array of scalar factors in the output NMatrix.
  #
  # The matrix Q is represented as a product of elementary reflectors
  #     Q = H(1) H(2) . . . H(k), where k = min(m,n).
  #
  # Each H(i) has the form
  #
  #     H(i) = I - tau * v * v'
  #
  # http://www.netlib.org/lapack/explore-html/d3/d69/dgeqrf_8f.html
  #
  # Only works for dense matrices.
  #
  # * *Returns* :
  #   - Vector TAU. Q and R are stored in A. Q is represented by TAU and A
  # * *Raises* :
  #   - +StorageTypeError+ -> LAPACK functions only work on dense matrices.
  #
  def geqrf!
    # The real implementation is in lib/nmatrix/lapacke.rb
    raise(NotImplementedError, "geqrf! requires the nmatrix-lapacke gem")
  end

  #
  # call-seq:
  #     ormqr(tau) -> NMatrix
  #     ormqr(tau, side, transpose, c) -> NMatrix
  #
  # Returns the product Q * c or c * Q after a call to geqrf! used in QR factorization.
  # +c+ is overwritten with the elements of the result NMatrix if supplied. Q is the orthogonal matrix
  # represented by tau and the calling NMatrix
  #
  # Only works on float types, use unmqr for complex types.
  #
  # == Arguments
  #
  # * +tau+ - vector containing scalar factors of elementary reflectors
  # * +side+ - direction of multiplication [:left, :right]
  # * +transpose+ - apply Q with or without transpose [false, :transpose]
  # * +c+ - NMatrix multplication argument that is overwritten, no argument assumes c = identity
  #
  # * *Returns* :
  #
  #   - Q * c or c * Q Where Q may be transposed before multiplication.
  #
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> LAPACK functions only work on dense matrices.
  #   - +TypeError+ -> Works only on floating point matrices, use unmqr for complex types
  #   - +TypeError+ -> c must have the same dtype as the calling NMatrix
  #
  def ormqr(tau, side=:left, transpose=false, c=nil)
    # The real implementation is in lib/nmatrix/lapacke.rb
    raise(NotImplementedError, "ormqr requires the nmatrix-lapacke gem")

  end

  #
  # call-seq:
  #     unmqr(tau) -> NMatrix
  #     unmqr(tau, side, transpose, c) -> NMatrix
  #
  # Returns the product Q * c or c * Q after a call to geqrf! used in QR factorization.
  # +c+ is overwritten with the elements of the result NMatrix if it is supplied. Q is the orthogonal matrix
  # represented by tau and the calling NMatrix
  #
  # Only works on complex types, use ormqr for float types.
  #
  # == Arguments
  #
  # * +tau+ - vector containing scalar factors of elementary reflectors
  # * +side+ - direction of multiplication [:left, :right]
  # * +transpose+ - apply Q as Q or its complex conjugate [false, :complex_conjugate]
  # * +c+ - NMatrix multplication argument that is overwritten, no argument assumes c = identity
  #
  # * *Returns* :
  #
  #   - Q * c or c * Q Where Q may be transformed to its complex conjugate before multiplication.
  #
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> LAPACK functions only work on dense matrices.
  #   - +TypeError+ -> Works only on floating point matrices, use unmqr for complex types
  #   - +TypeError+ -> c must have the same dtype as the calling NMatrix
  #
  def unmqr(tau, side=:left, transpose=false, c=nil)
    # The real implementation is in lib/nmatrix/lapacke.rb
    raise(NotImplementedError, "unmqr requires the nmatrix-lapacke gem")
  end

  #
  # call-seq:
  #     potrf!(upper_or_lower) -> NMatrix
  #
  # Cholesky factorization of a symmetric positive-definite matrix -- or, if complex,
  # a Hermitian positive-definite matrix +A+.
  # The result will be written in either the upper or lower triangular portion of the
  # matrix, depending on whether the argument is +:upper+ or +:lower+.
  # Also the function only reads in the upper or lower part of the matrix,
  # so it doesn't actually have to be symmetric/Hermitian.
  # However, if the matrix (i.e. the symmetric matrix implied by the lower/upper
  # half) is not positive-definite, the function will return nonsense.
  #
  # This functions requires either the nmatrix-atlas or nmatrix-lapacke gem
  # installed.
  #
  # * *Returns* :
  #   the triangular portion specified by the parameter
  # * *Raises* :
  #   - +StorageTypeError+ -> ATLAS functions only work on dense matrices.
  #   - +ShapeError+ -> Must be square.
  #   - +NotImplementedError+ -> If called without nmatrix-atlas or nmatrix-lapacke gem
  #
  def potrf!(which)
    # The real implementation is in the plugin files.
    cholesky = CholeskyDecomposition.new(self.twoDMat)
    if which == :upper
      u = create_dummy_nmatrix
      twoDMat = cholesky.getLT
      u.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(twoDMat.getData, @shape[0], @shape[1]))
      return u
    else
      l = create_dummy_nmatrix
      twoDMat = cholesky.getL
      l.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(twoDMat.getData, @shape[0], @shape[1]))
      return l
    end
  end

  def potrf_upper!
    potrf! :upper
  end

  def potrf_lower!
    potrf! :lower
  end


  #
  # call-seq:
  #     factorize_cholesky -> [upper NMatrix, lower NMatrix]
  #
  # Calculates the Cholesky factorization of a matrix and returns the
  # upper and lower matrices such that A=LU and L=U*, where * is
  # either the transpose or conjugate transpose.
  #
  # Unlike potrf!, this makes method requires that the original is matrix is
  # symmetric or Hermitian. However, it is still your responsibility to make
  # sure it is positive-definite.
  def factorize_cholesky
    # raise "Matrix must be symmetric/Hermitian for Cholesky factorization" unless self.hermitian?
    cholesky = CholeskyDecomposition.new(self.twoDMat)
    l = create_dummy_nmatrix
    twoDMat = cholesky.getL
    l.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(twoDMat.getData, @shape[0], @shape[1]))
    u = create_dummy_nmatrix
    twoDMat = cholesky.getLT
    u.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(twoDMat.getData, @shape[0], @shape[1]))
    return [u,l]
  end

  #
  # call-seq:
  #     factorize_lu -> ...
  #
  # LU factorization of a matrix. Optionally return the permutation matrix.
  #   Note that computing the permutation matrix will introduce a slight memory
  #   and time overhead.
  #
  # == Arguments
  #
  # +with_permutation_matrix+ - If set to *true* will return the permutation
  #   matrix alongwith the LU factorization as a second return value.
  #
  def factorize_lu with_permutation_matrix=nil
    raise(NotImplementedError, "only implemented for dense storage") unless self.stype == :dense
    raise(NotImplementedError, "matrix is not 2-dimensional") unless self.dimensions == 2
    t = self.clone
    pivot = create_dummy_nmatrix
    twoDMat = LUDecomposition.new(self.twoDMat).getP
    pivot.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(twoDMat.getData, @shape[0], @shape[1]))
    return [t,pivot]
  end

  #
  # call-seq:
  #     factorize_qr -> [Q,R]
  #
  # QR factorization of a matrix without column pivoting.
  # Q is orthogonal and R is upper triangular if input is square or upper trapezoidal if
  # input is rectangular.
  #
  # Only works for dense matrices.
  #
  # * *Returns* :
  #   - Array containing Q and R matrices
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> only implemented for desnse storage.
  #   - +ShapeError+ -> Input must be a 2-dimensional matrix to have a QR decomposition.
  #
  def factorize_qr

    raise(NotImplementedError, "only implemented for dense storage") unless self.stype == :dense
    raise(ShapeError, "Input must be a 2-dimensional matrix to have a QR decomposition") unless self.dim == 2
    qrdecomp = QRDecomposition.new(self.twoDMat)

    qmat = create_dummy_nmatrix
    qtwoDMat = qrdecomp.getQ
    qmat.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(qtwoDMat.getData, @shape[0], @shape[1]))

    rmat = create_dummy_nmatrix
    rtwoDMat = qrdecomp.getR
    rmat.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(rtwoDMat.getData, @shape[0], @shape[1]))
    return [qmat,rmat]

  end

  # Solve the matrix equation AX = B, where A is +self+, B is the first
  # argument, and X is returned. A must be a nxn square matrix, while B must be
  # nxm. Only works with dense matrices and non-integer, non-object data types.
  #
  # == Arguments
  #
  # * +b+ - the right hand side
  #
  # == Options
  #
  # * +form+ - Signifies the form of the matrix A in the linear system AX=B.
  #   If not set then it defaults to +:general+, which uses an LU solver.
  #   Other possible values are +:lower_tri+, +:upper_tri+ and +:pos_def+ (alternatively,
  #   non-abbreviated symbols +:lower_triangular+, +:upper_triangular+,
  #   and +:positive_definite+ can be used.
  #   If +:lower_tri+ or +:upper_tri+ is set, then a specialized linear solver for linear
  #   systems AX=B with a lower or upper triangular matrix A is used. If +:pos_def+ is chosen,
  #   then the linear system is solved via the Cholesky factorization.
  #   Note that when +:lower_tri+ or +:upper_tri+ is used, then the algorithm just assumes that
  #   all entries in the lower/upper triangle of the matrix are zeros without checking (which
  #   can be useful in certain applications).
  #
  #
  # == Usage
  #
  #   a = NMatrix.new [2,2], [3,1,1,2], dtype: dtype
  #   b = NMatrix.new [2,1], [9,8], dtype: dtype
  #   a.solve(b)
  #
  #   # solve an upper triangular linear system more efficiently:
  #   require 'benchmark'
  #   require 'nmatrix/lapacke'
  #   rand_mat = NMatrix.random([10000, 10000], dtype: :float64)
  #   a = rand_mat.triu
  #   b = NMatrix.random([10000, 10], dtype: :float64)
  #   Benchmark.bm(10) do |bm|
  #     bm.report('general') { a.solve(b) }
  #     bm.report('upper_tri') { a.solve(b, form: :upper_tri) }
  #   end
  #   #                   user     system      total        real
  #   #  general     73.170000   0.670000  73.840000 ( 73.810086)
  #   #  upper_tri    0.180000   0.000000   0.180000 (  0.182491)
  #
  def solve(b, opts = {})
    raise(ShapeError, "Must be called on square matrix") unless self.dim == 2 && self.shape[0] == self.shape[1]
    raise(ShapeError, "number of rows of b must equal number of cols of self") if
      self.shape[1] != b.shape[0]
    raise(ArgumentError, "only works with dense matrices") if self.stype != :dense
    raise(ArgumentError, "only works for non-integer, non-object dtypes") if
      integer_dtype? or object_dtype? or b.integer_dtype? or b.object_dtype?

    opts = { form: :general }.merge(opts)
    x    = b.clone
    n    = self.shape[0]
    nrhs = b.shape[1]

    nmatrix = create_dummy_nmatrix
    case opts[:form]
    when :general, :upper_tri, :upper_triangular, :lower_tri, :lower_triangular
      #LU solver
      solver = LUDecomposition.new(self.twoDMat).getSolver
      nmatrix.s = solver.solve(b.s)
      return nmatrix
    when :pos_def, :positive_definite
      solver = CholeskyDecomposition.new(self.twoDMat).getSolver
      nmatrix.s = solver.solve(b.s)
      return nmatrix
    else
      raise(ArgumentError, "#{opts[:form]} is not a valid form option")
    end

  end

  #
  # call-seq:
  #     det -> determinant
  #
  # Calculate the determinant by way of LU decomposition. This is accomplished
  # using clapack_getrf, and then by taking the product of the diagonal elements. There is a
  # risk of underflow/overflow.
  #
  # There are probably also more efficient ways to calculate the determinant.
  # This method requires making a copy of the matrix, since clapack_getrf
  # modifies its input.
  #
  # For smaller matrices, you may be able to use +#det_exact+.
  #
  # This function is guaranteed to return the same type of data in the matrix
  # upon which it is called.
  #
  # Integer matrices are converted to floating point matrices for the purposes of
  # performing the calculation, as xGETRF can't work on integer matrices.
  #
  # * *Returns* :
  #   - The determinant of the matrix. It's the same type as the matrix's dtype.
  # * *Raises* :
  #   - +ShapeError+ -> Must be used on square matrices.
  #
  def det
    raise(ShapeError, "determinant can be calculated only for square matrices") unless self.dim == 2 && self.shape[0] == self.shape[1]
    self.det_exact2
  end

  #
  # call-seq:
  #     complex_conjugate -> NMatrix
  #     complex_conjugate(new_stype) -> NMatrix
  #
  # Get the complex conjugate of this matrix. See also complex_conjugate! for
  # an in-place operation (provided the dtype is already +:complex64+ or
  # +:complex128+).
  #
  # Doesn't work on list matrices, but you can optionally pass in the stype you
  # want to cast to if you're dealing with a list matrix.
  #
  # * *Arguments* :
  #   - +new_stype+ -> stype for the new matrix.
  # * *Returns* :
  #   - If the original NMatrix isn't complex, the result is a +:complex128+ NMatrix. Otherwise, it's the original dtype.
  #
  def complex_conjugate(new_stype = self.stype)
    self.cast(new_stype, NMatrix::upcast(dtype, :complex64)).complex_conjugate!
  end

  #
  # call-seq:
  #     conjugate_transpose -> NMatrix
  #
  # Calculate the conjugate transpose of a matrix. If your dtype is already
  # complex, this should only require one copy (for the transpose).
  #
  # * *Returns* :
  #   - The conjugate transpose of the matrix as a copy.
  #
  def conjugate_transpose
    self.transpose.complex_conjugate!
  end

  #
  # call-seq:
  #     absolute_sum -> Numeric
  #
  # == Arguments
  #   - +incx+ -> the skip size (defaults to 1, no skip)
  #   - +n+ -> the number of elements to include
  #
  # Return the sum of the contents of the vector. This is the BLAS asum routine.
  def asum incx=1, n=nil
    if self.shape == [1]
      return self[0].abs unless self.complex_dtype?
      return self[0].real.abs + self[0].imag.abs
    end
    return method_missing(:asum, incx, n) unless vector?
    NMatrix::BLAS::asum(self, incx, self.size / incx)
  end
  alias :absolute_sum :asum

  #
  # call-seq:
  #     norm2 -> Numeric
  #
  # == Arguments
  #   - +incx+ -> the skip size (defaults to 1, no skip)
  #   - +n+ -> the number of elements to include
  #
  # Return the 2-norm of the vector. This is the BLAS nrm2 routine.
  def nrm2 incx=1, n=nil
    self.twoDMat.getFrobeniusNorm()
  end
  alias :norm2 :nrm2

  #
  # call-seq:
  #     scale! -> NMatrix
  #
  # == Arguments
  #   - +alpha+ -> Scalar value used in the operation.
  #   - +inc+ -> Increment used in the scaling function. Should generally be 1.
  #   - +n+ -> Number of elements of +vector+.
  #
  # This is a destructive method, modifying the source NMatrix.  See also #scale.
  # Return the scaling result of the matrix. BLAS scal will be invoked if provided.

  def scale!(alpha, incx=1, n=nil)
    #FIXME
    # raise(DataTypeError, "Incompatible data type for the scaling factor") unless
    #     NMatrix::upcast(self.dtype, NMatrix::min_dtype(alpha)) == self.dtype
    raise(DataTypeError, "Incompatible data type for the scaling factor") if
        self.dtype == :int8
    @s.mapMultiplyToSelf(alpha)
    return self
  end

  #
  # call-seq:
  #     scale -> NMatrix
  #
  # == Arguments
  #   - +alpha+ -> Scalar value used in the operation.
  #   - +inc+ -> Increment used in the scaling function. Should generally be 1.
  #   - +n+ -> Number of elements of +vector+.
  #
  # Return the scaling result of the matrix. BLAS scal will be invoked if provided.

  def scale(alpha, incx=1, n=nil)
    # FIXME
    # raise(DataTypeError, "Incompatible data type for the scaling factor") unless
    #     NMatrix::upcast(self.dtype, NMatrix::min_dtype(alpha)) == self.dtype
    raise(DataTypeError, "Incompatible data type for the scaling factor") if
        self.dtype == :byte || self.dtype == :int8 || self.dtype == :int16 ||
        self.dtype == :int32 || self.dtype == :int64
    nmatrix = NMatrix.new :copy
    nmatrix.shape = @shape.clone
    nmatrix.s = ArrayRealVector.new(@s.toArray.clone).mapMultiplyToSelf(alpha)
    return nmatrix
  end

end


================================================
FILE: lib/nmatrix/jruby/nmatrix_java.rb
================================================
require 'java'
require_relative '../../../ext/nmatrix_java/vendor/commons-math3-3.6.1.jar'
require_relative '../../../ext/nmatrix_java/target/nmatrix.jar'

java_import 'org.apache.commons.math3.linear.ArrayRealVector'
java_import 'org.apache.commons.math3.linear.RealMatrix'
java_import 'org.apache.commons.math3.linear.MatrixUtils'
java_import 'org.apache.commons.math3.linear.DecompositionSolver'
java_import 'org.apache.commons.math3.linear.LUDecomposition'
java_import 'org.apache.commons.math3.linear.QRDecomposition'
java_import 'org.apache.commons.math3.linear.CholeskyDecomposition'
java_import 'MatrixGenerator'
java_import 'ArrayGenerator'
java_import 'MathHelper'
java_import 'ArrayComparator'

class NMatrix
  include_package 'org.apache.commons.math3.analysis.function'
  attr_accessor :shape, :dim, :dtype, :stype, :s

  def initialize(*args)
    if args[-1] == :copy
      @shape = [2,2]
      @s = [0,0,0,0]
      @dim = shape.is_a?(Array) ? shape.length : 2
    else
      if (args.length <= 3)
        @shape = args[0]
        if args[1].is_a?(Array)
          elements = args[1]
          if args.length > 2
            hash = args[2]
            # puts hash
            @dtype = hash[:dtype]
            @stype = hash[:stype]
          else
            @dtype = :float64
            @stype = :dense
          end
        else
          # elements = Java::double[shape[0]*shape[1]].new{ Java::Double.NaN }
          if args.length > 1
            if args[1].is_a?(Symbol)
              hash = args[1]
              @dtype = hash[:dtype]
              @stype = hash[:stype]
              elements = Array.new(shape*shape) unless shape.is_a? Array
            else
              elements = Array.new(shape*shape) unless shape.is_a? Array
            end
          end
        end
      else

        offset = 0
        if (!args[0].is_a?(Symbol) && !args[0].is_a?(String))
          @stype = :dense
        else
          offset = 1
          @stype = :dense
          @dtype = args[-1]
        end

        @shape = args[offset]
        elements = args[offset+1]

      end


      @shape = [shape,shape] unless shape.is_a?(Array)
      # @dtype = interpret_dtype(argc-1-offset, argv+offset+1, stype);
      # @dtype = args[:dtype] if args[:dtype]
      @dtype_sym = nil
      @stype_sym = nil
      @default_val_num = nil
      @capacity_num = nil
      @size = (0...@shape.size).inject(1) { |x,i| x * @shape[i] }

      j=0

      if (elements.is_a?(ArrayRealVector))
        @s = elements
      # elsif elements.java_class.to_s == "[D"
      #   @s = ArrayRealVector.new(elements)
      else
        storage = Array.new(size)
        elements = [elements,elements] unless elements.is_a?(Array)
        if size > elements.length
          (0...size).each do |i|
            j=0 unless j!=elements.length
            storage[i] = elements[j]
            j+=1
          end
        else
          storage = elements
        end
        if @dtype == :object
          @s = storage
        else
          @s = ArrayRealVector.new(storage.to_java Java::double)
        end
      end

      @dim = @shape.is_a?(Array) ? @shape.length : 2

    end
  end

  # Needs to be properly implemented
  def self.min_dtype(alpha)
    :int8
  end

  def self.upcast(alpha, beta)
    false
  end

  def clone
    result = create_dummy_nmatrix
    # ArrayRealVector#clone is disable, hence use copy
    # that returns a deep copy of the object.
    result.s = @s.copy
    return result
  end

  def entries
    return @s.toArray.to_a
  end

  def twoDMat
    return MatrixUtils.createRealMatrix MatrixGenerator.getMatrixDouble(self.s.toArray, @shape[0], @shape[1])
  end

  def dtype
    return @dtype
  end

  #FIXME
  def self.guess_dtype arg
    :float32
  end

  def stype
    @stype = :dense
  end

  def cast_full *args
    if args.is_a? Hash
      self.dtype = args[:dtype]
    else
      self.dtype = args[1]
    end
    return self
  end

  def default_value
    return nil
  end

  def __list_default_value__
    #not implemented currently
  end

  def __yale_default_value__
    #not implemented currently
  end

  def [] *args
    return xslice(args)
  end

  def slice(*args)
    return xslice(args)
  end

  def []=(*args)
    to_return = nil
    if args.length > @dim+1
      raise(ArgumentError, "wrong number of arguments (#{args.length} for #{effective_dim(dim+1)})" )
    else
      slice = get_slice(@dim, args, @shape)
      dense_storage_set(slice, args[-1])
      to_return = args[-1]
    end
    return to_return
  end

  def is_ref?

  end

  # def dim
  #   shape.is_a?(Array) ? shape.length : 2
  # end

  alias :dimensions :dim

  def effective_dim(s)
    d = 0
    (0...@dim).each do |i|
      d+=1 unless @shape[i] == 1
    end
    return d
  end

  alias :effective_dimensions :effective_dim


  protected

  def create_dummy_nmatrix
    nmatrix = NMatrix.new(:copy)
    nmatrix.shape = self.shape
    nmatrix.dim = self.dim
    nmatrix.dtype = self.dtype
    nmatrix.stype = self.stype
    return nmatrix
  end

  def __list_to_hash__

  end

  public

  def shape
    @shape
  end

   def supershape s
    if (s[:src] == @s)
      return shape
       # easy case (not a slice)
    else
      @s = s[:src]
    end

    new_shape = Array.new(dim)
    (0...dim).each do |index|
      new_shape[index] = shape[index]
    end

    return new_shape
  end

  def offset
    # ArrayRealVector takes care of the offset value when indexing a Vector.
    # Hence, return 0.
    0
  end

  def det_exact
    # if (:stype != :dense)
    #   raise Exception.new("can only calculate exact determinant for dense matrices")
    #   return nil
    # end
    raise(DataTypeError, "cannot call det_exact on unsigned type") if(self.dtype == :byte)
    if (@dim != 2 || @shape[0] != @shape[1])
      raise(ShapeError, "matrices must be square to have a determinant defined")
      return nil
    end
    to_return = nil
    if (dtype == :object)
      # to_return = *reinterpret_cast<VALUE*>(result);
    else
      to_return = LUDecomposition.new(self.twoDMat).getDeterminant()
    end

    return to_return.round(3)
  end

  def det_exact2
    if (@dim != 2 || @shape[0] != @shape[1])
      raise(ShapeError, "matrices must be square to have a determinant defined")
      return nil
    end
    to_return = nil
    if (dtype == :object)
      # to_return = *reinterpret_cast<VALUE*>(result);
    else
      to_return = LUDecomposition.new(self.twoDMat).getDeterminant()
    end

    return to_return.round(3)
  end

  def complex_conjugate!

  end


  protected

  def count_max_elements
    return size
  end

  def reshape_bang arg
    if(@stype == :dense)
      shape_ary = arg
      size = count_max_elements
      new_size = 1
      shape = interpret_shape(shape_ary, dim)

      (0...dim).each do |index|
        new_size *= shape[index]
      end

      if (size == new_size)
        self.shape = shape
        self.dim = dim
        return self
      else
         raise(ArgumentError, "reshape cannot resize; size of new and old matrices must match")
      end
    else
      raise(NotImplementedError, "reshape in place only for dense stype")
    end
  end

  def interpret_shape(shape_ary, dim)
    shape = []

    if shape_ary.is_a?(Array)
      dim = shape_ary.length

      (0...dim).each do |index|
        shape[index] = shape_ary[index].to_i
      end

    elsif shape_ary.is_a?(FIXNUM)
      dim = 2
      shape = Array.new(dim)
      shape[0] = shape_ary.to_i
      shape[1] = shape_ary.to_i
    else
      raise(ArgumentError, "Expected an array of numbers or a single Fixnum for matrix shape")
    end

    return shape
  end


  public

  def each_with_indices
    nmatrix = create_dummy_nmatrix
    stride = get_stride(self)
    offset = 0
    #Create indices and initialize them to zero
    coords = Array.new(dim){ 0 }

    shape_copy =  Array.new(dim)
    (0...size).each do |k|
      dense_storage_coords(nmatrix, k, coords, stride, offset)
      slice_index = dense_storage_pos(coords,stride)
      ary = Array.new
      if (@dtype == :object)
        ary << self.s[slice_index]
      else
        ary << self.s.toArray.to_a[slice_index]
      end
      (0...dim).each do |p|
        ary << coords[p]
      end

      # yield the array which now consists of the value and the indices
      yield(ary)
    end if block_given?

    return nmatrix
  end


  def each_stored_with_indices
    nmatrix = create_dummy_nmatrix
    stride = get_stride(self)
    offset = 0
    #Create indices and initialize them to zero
    coords = Array.new(dim){ 0 }

    shape_copy =  Array.new(dim)

    (0...size).each do |k|
      dense_storage_coords(nmatrix, k, coords, stride, offset)
      slice_index = dense_storage_pos(coords,stride)
      ary = Array.new
      if (@dtype == :object)
        ary << self.s[slice_index]
      else
        ary << self.s.toArray.to_a[slice_index]
      end
      (0...dim).each do |p|
        ary << coords[p]
      end
      # yield the array which now consists of the value and the indices
      yield(ary)
    end if block_given?

    return nmatrix
  end

  def map_stored

  end

  def each_ordered_stored_with_indices

  end


  protected

  def __dense_each__
    nmatrix = create_dummy_nmatrix
    stride = get_stride(self)
    offset = 0
    #Create indices and initialize them to zero
    coords = Array.new(dim){ 0 }

    shape_copy =  Array.new(dim)
    (0...size).each do |k|
      if (@dtype == :object)
        dense_storage_coords(nmatrix, k, coords, stride, offset)
        slice_index = dense_storage_pos(coords,stride)
        yield self.s[slice_index]
      else
        dense_storage_coords(nmatrix, k, coords, stride, offset)
        slice_index = dense_storage_pos(coords,stride)
        yield self.s.toArray.to_a[slice_index]
      end
    end if block_given?
    if (@dtype == :object)
      return @s.to_enum
    else
      return @s.toArray().to_a.to_enum
    end
  end

  def __dense_map__
    nmatrix = create_dummy_nmatrix
    stride = get_stride(self)
    offset = 0
    coords = Array.new(dim){ 0 }
    shape_copy =  Array.new(dim)

    s= Java::double[size].new
    (0...size).each do |k|
      dense_storage_coords(nmatrix, k, coords, stride, offset)
      slice_index = dense_storage_pos(coords,stride)

      y = yield @s.getEntry(slice_index)
      @s.setEntry(slice_index, y)
    end
    nmatrix.s = ArrayRealVector.new s

    return nmatrix
  end

  def __dense_map_pair__

  end

  def __list_map_merged_stored__

  end

  def __list_map_stored__

  end

  def __yale_map_merged_stored__

  end

  def __yale_map_stored__

  end

  def __yale_stored_diagonal_each_with_indices__

  end

  def __yale_stored_nondiagonal_each_with_indices__

  end


  public

  def ==(otherNmatrix)
    result = false
    if (otherNmatrix.is_a?(NMatrix))
      #check dimension
      if (@dim != otherNmatrix.dim)
        raise(ShapeError, "cannot compare matrices with different dimension")
      end
      #check shape
      (0...dim).each do |i|
        if (@shape[i] != otherNmatrix.shape[i])
          raise(ShapeError, "cannot compare matrices with different shapes");
        end
      end

      #check the entries
      if dtype == :object
        result = @s == otherNmatrix.s
      else
        result = ArrayComparator.equals(@s.toArray, otherNmatrix.s.toArray)
      end
    end
    result
  end

  def =~ (other)
    lha = @s.toArray.to_a
    rha = other.s.toArray.to_a
    resultArray = Array.new(lha.length)
    if (other.is_a?(NMatrix))
      #check dimension
      if (@dim != other.dim)
        raise(ShapeError, "cannot compare matrices with different dimension")
        return nil
      end
      #check shape
      (0...dim).each do |i|
        if (@shape[i] != other.shape[i])
          raise(ShapeError, "cannot compare matrices with different shapes");
          return nil
        end
      end
      #check the entries
      (0...lha.length).each do |i|
        resultArray[i] = lha[i] == rha[i] ? true : false
      end
      result = NMatrix.new(:copy)
      result.shape = @shape
      result.dtype = :object
      result.s = resultArray
    end
    result
  end

  def !~ (other)
    lha = @s.toArray.to_a
    rha = other.s.toArray.to_a
    resultArray = Array.new(lha.length)
    if (other.is_a?(NMatrix))
      #check dimension
      if (@dim != other.dim)
        raise(ShapeError, "cannot compare matrices with different dimension")
        return nil
      end
      #check shape
      (0...dim).each do |i|
        if (@shape[i] != other.shape[i])
          raise(ShapeError, "cannot compare matrices with different shapes");
          return nil
        end
      end
      #check the entries
      (0...lha.length).each do |i|
        resultArray[i] = lha[i] != rha[i] ? true : false
      end
      result = NMatrix.new(:copy)
      result.shape = @shape
      result.dtype = :object
      result.s = resultArray
    end
    result
  end

  def <= (other)
    lha = @s.toArray.to_a
    rha = other.s.toArray.to_a
    resultArray = Array.new(lha.length)
    if (other.is_a?(NMatrix))
      #check dimension
      if (@dim != other.dim)
        raise(ShapeError, "cannot compare matrices with different dimension")
        return nil
      end
      #check shape
      (0...dim).each do |i|
        if (@shape[i] != other.shape[i])
          raise(ShapeError, "cannot compare matrices with different shapes");
          return nil
        end
      end
      #check the entries
      (0...lha.length).each do |i|
        resultArray[i] = lha[i] <= rha[i] ? true : false
      end
      result = NMatrix.new(:copy)
      result.shape = @shape
      result.dtype = :object
      result.s = resultArray
    end
    result
  end

  def >= (other)
    lha = @s.toArray.to_a
    rha = other.s.toArray.to_a
    resultArray = Array.new(lha.length)
    if (other.is_a?(NMatrix))
      #check dimension
      if (@dim != other.dim)
        raise(ShapeError, "cannot compare matrices with different dimension")
        return nil
      end
      #check shape
      (0...dim).each do |i|
        if (@shape[i] != other.shape[i])
          raise(ShapeError, "cannot compare matrices with different shapes");
          return nil
        end
      end
      #check the entries
      (0...lha.length).each do |i|
        resultArray[i] = lha[i] >= rha[i] ? true : false
      end
      result = NMatrix.new(:copy)
      result.shape = @shape
      result.dtype = :object
      result.s = resultArray
    end
    result
  end

  def < (other)
    lha = @s.toArray.to_a
    rha = other.s.toArray.to_a
    resultArray = Array.new(lha.length)
    if (other.is_a?(NMatrix))
      #check dimension
      if (@dim != other.dim)
        raise(ShapeError, "cannot compare matrices with different dimension")
        return nil
      end
      #check shape
      (0...dim).each do |i|
        if (@shape[i] != other.shape[i])
          raise(ShapeError, "cannot compare matrices with different shapes");
          return nil
        end
      end
      #check the entries
      (0...lha.length).each do |i|
        resultArray[i] = lha[i] < rha[i] ? true : false
      end
      result = NMatrix.new(:copy)
      result.shape = @shape
      result.dtype = :object
      result.s = resultArray
    end
    result
  end

  def > (other)
    lha = @s.toArray.to_a
    rha = other.s.toArray.to_a
    resultArray = Array.new(lha.length)
    if (other.is_a?(NMatrix))
      #check dimension
      if (@dim != other.dim)
        raise(ShapeError, "cannot compare matrices with different dimension")
        return nil
      end
      #check shape
      (0...dim).each do |i|
        if (@shape[i] != other.shape[i])
          raise(ShapeError, "cannot compare matrices with different shapes");
          return nil
        end
      end
      #check the entries
      (0...lha.length).each do |i|
        resultArray[i] = lha[i] > rha[i] ? true : false
      end
      result = NMatrix.new(:copy)
      result.shape = @shape
      result.dtype = :object
      result.s = resultArray
    end
    result
  end

  # /////////////////////////
  # // Matrix Math Methods //
  # /////////////////////////

  def dot(other)
    result = nil
    if (other.is_a?(NMatrix))
      #check dimension
      if (@shape.length!=2 || other.shape.length!=2)
        raise(NotImplementedError, "please convert array to nx1 or 1xn NMatrix first")
        return nil
      end
      #check shape
      if (@shape[1] != other.shape[0])
        raise(ArgumentError, "incompatible dimensions")
        return nil
      end

      # if(@stype != other.stype)
      #   raise(NotImplementedError, "matrices must have same stype")
      # end

      result = create_dummy_nmatrix
      result.shape = [@shape[0],other.shape[1]]
      twoDMat = self.twoDMat.multiply(other.twoDMat)
      result.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(twoDMat.getData, @shape[0],other.shape[1]))
    else
      raise(ArgumentError, "cannot have dot product with a scalar");
    end
    return result;
  end

  def symmetric?
    return is_symmetric(false)
  end

  def is_symmetric(hermitian)
    is_symmetric = true

    if (@shape[0] == @shape[1] and @dim == 2)
      if @stype == :dense
        if (hermitian)
          #Currently, we are not dealing with complex matrices.
          eps = 0
          is_symmetric = MatrixUtils.isSymmetric(self.twoDMat, eps)
        else
          eps = 0
          is_symmetric = MatrixUtils.isSymmetric(self.twoDMat, eps)
        end

      else
        #TODO: Implement, at the very least, yale_is_symmetric. Model it after yale/transp.template.c.
        # raise Exception.new("symmetric? and hermitian? only implemented for dense currently")
      end
    end
    return is_symmetric ? true : false
  end

  def hermitian?
    return is_symmetric(true)
  end

  def capacity

  end

  # // protected methods

  protected

  def __inverse__(matrix, bool =true)
    # if (:stype != :dense)
    #   raise Exception.new("needs exact determinant implementation for this matrix stype")
    #   return nil
    # end

    if (@dim != 2 || @shape[0] != @shape[1])
      raise Exception.new("matrices must be square to have an inverse defined")
      return nil
    end
    to_return = nil
    if (dtype == :RUBYOBJ)
      # to_return = *reinterpret_cast<VALUE*>(result);
    else
      to_return = create_dummy_nmatrix
      twoDMat = MatrixUtils.inverse(matrix.twoDMat)
      to_return.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(twoDMat.getData, @shape[0], @shape[1]))
    end

    return to_return
  end

  def __inverse__!
    # if (:stype != :dense)
    #   raise Exception.new("needs exact determinant implementation for this matrix stype")
    #   return nil
    # end

    if (@dim != 2 || @shape[0] != @shape[1])
      raise Exception.new("matrices must be square to have an inverse defined")
      return nil
    end
    to_return = nil
    if (dtype == :RUBYOBJ)
      # to_return = *reinterpret_cast<VALUE*>(result);
    else
      twoDMat = MatrixUtils.inverse(self.twoDMat)
      @s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(twoDMat.getData, @shape[0], @shape[1]))
    end

    return self
  end

  def __inverse_exact__
    # if (:stype != :dense)
    #   raise Exception.new("needs exact determinant implementation for this matrix stype")
    #   return nil
    # end

    if (@dim != 2 || @shape[0] != @shape[1])
      raise Exception.new("matrices must be square to have an inverse defined")
      return nil
    end
    to_return = nil
    if (dtype == :RUBYOBJ)
      # to_return = *reinterpret_cast<VALUE*>(result);
    else
      to_return = create_dummy_nmatrix
      twoDMat = MatrixUtils.inverse(self.twoDMat)
      to_return.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(twoDMat.getData, @shape[0], @shape[1]))
    end

    return to_return

  end

  private

  # // private methods

  def __hessenberg__(param)
    raise(NotImplementedError, "Hessenberg Transformer not implemented for NMatrix-JRuby")
  end
end

# load jruby implementation of operators.
require_relative './slice.rb'
require_relative './operators.rb'
require_relative './decomposition.rb'
require_relative './error.rb'
require_relative './enumerable.rb'

================================================
FILE: lib/nmatrix/jruby/operators.rb
================================================
class NMatrix

  # A dummy matrix is a matrix without the elements atrribute.
  # NMatrix#create_dummy_matrix prevents creating copies as @s is set explicitly.
  def +(other)
    result = create_dummy_nmatrix
    if (other.is_a?(NMatrix))
      #check dimension
      raise(ShapeError, "Cannot add matrices with different dimension") if (@dim != other.dim)
      #check shape
      (0...dim).each do |i|
        raise(ShapeError, "Cannot add matrices with different shapes") if (@shape[i] != other.shape[i])
      end
      result.s = @s.copy.add(other.s)
    else
      result.s = @s.copy.mapAddToSelf(other)
    end
    result
  end

  def -(other)
    result = create_dummy_nmatrix
    if (other.is_a?(NMatrix))
      #check dimension
      raise(ShapeError, "Cannot subtract matrices with different dimension") if (@dim != other.dim)
      #check shape
      (0...dim).each do |i|
        raise(ShapeError, "Cannot subtract matrices with different shapes") if (@shape[i] != other.shape[i])
      end
      result.s = @s.copy.subtract(other.s)
    else
      result.s = @s.copy.mapSubtractToSelf(other)
    end
    result
  end

  def *(other)
    result = create_dummy_nmatrix
    if (other.is_a?(NMatrix))
      #check dimension
      raise(ShapeError, "Cannot multiply matrices with different dimension") if (@dim != other.dim)
      #check shape
      (0...dim).each do |i|
        raise(ShapeError, "Cannot multiply matrices with different shapes") if (@shape[i] != other.shape[i])
      end
      result.s = @s.copy.ebeMultiply(other.s)
    else
      result.s = @s.copy.mapMultiplyToSelf(other)
    end
    result
  end

  def /(other)
    result = create_dummy_nmatrix
    if (other.is_a?(NMatrix))
      #check dimension
      raise(ShapeError, "Cannot divide matrices with different dimension") if (@dim != other.dim)
      #check shape
      (0...dim).each do |i|
        raise(ShapeError, "Cannot divide matrices with different shapes") if (@shape[i] != other.shape[i])
      end
      result.s = @s.copy.ebeDivide(other.s)
    else
      result.s = @s.copy.mapDivideToSelf(other)
    end
    result
  end

  def ** val
    result = NMatrix.new(:copy)
    result.shape = @shape
    result.dim = @dim
    result.s = @s.copy.mapToSelf(Power.new(val))
    result
  end

  def %(other)
    raise Exception.new("modulus not supported in NMatrix-jruby")
  end

  def atan2(other, scalar=false)
    result = create_dummy_nmatrix
    if scalar
      result.s = ArrayRealVector.new MathHelper.atan2Scalar(other, @s.toArray)
    else
      if other.is_a? NMatrix
        result.s = ArrayRealVector.new MathHelper.atan2(other.s.toArray, @s.toArray)
      else
        result.s = ArrayRealVector.new MathHelper.atan2Scalar2(other, @s.toArray)
      end
    end
    result
  end

  def ldexp(other, scalar=false)
    result = create_dummy_nmatrix
    if scalar
      result.s = ArrayRealVector.new MathHelper.ldexpScalar(other, @s.toArray)
    else
      if other.is_a? NMatrix
        result.s = ArrayRealVector.new MathHelper.ldexp(other.s.toArray, @s.toArray)
      else
        result.s = ArrayRealVector.new MathHelper.ldexpScalar2(other, @s.toArray)
      end
    end
    result
  end

  def hypot(other, scalar=false)
    result = create_dummy_nmatrix
    if scalar
      result.s = ArrayRealVector.new MathHelper.hypotScalar(other, @s.toArray)
    else
      if other.is_a? NMatrix
        result.s = ArrayRealVector.new MathHelper.hypot(other.s.toArray, @s.toArray)
      else
        result.s = ArrayRealVector.new MathHelper.hypotScalar(other, @s.toArray)
      end
    end
    result
  end

  def sin
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Sin.new())
    result
  end

  def cos
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Cos.new())
    result
  end

  def tan
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Tan.new())
    result
  end

  def asin
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Asin.new())
    result
  end

  def acos
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Acos.new())
    result
  end

  def atan
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Atan.new())
    result
  end

  def sinh
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Sinh.new())
    result
  end

  def cosh
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Cosh.new())
    result
  end

  def tanh
    result = NMatrix.new(:copy)
    result.shape = @shape
    result.dim = @dim
    result.s = @s.copy.mapToSelf(Tanh.new())
    result
  end

  def asinh
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Asinh.new())
    result
  end

  def acosh
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Acosh.new())
    result
  end

  def atanh
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Atanh.new())
    result
  end

  def exp
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Exp.new())
    result
  end

  def log(val = :natural)
    result = create_dummy_nmatrix
    if val == :natural
      result.s = @s.copy.mapToSelf(Log.new())
    else
      result.s = ArrayRealVector.new MathHelper.log(val, @s.toArray)
    end
    result
  end

  def log2
    self.log(2)
  end

  def log10
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Log10.new())
    result
  end

  def sqrt
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Sqrt.new())
    result
  end

  def erf
    result = create_dummy_nmatrix
    result.s = ArrayRealVector.new MathHelper.erf(@s.toArray)
    result
  end

  def erfc
    result = create_dummy_nmatrix
    result.s = ArrayRealVector.new MathHelper.erfc(@s.toArray)
    result
  end

  def cbrt
    result = create_dummy_nmatrix
    result.s = @s.copy.mapToSelf(Cbrt.new())
    result
  end

  def gamma
    result = create_dummy_nmatrix
    result.s = ArrayRealVector.new MathHelper.gamma(@s.toArray)
    result
  end

  def -@
    result = create_dummy_nmatrix
    result.s = @s.copy.mapMultiplyToSelf(-1)
    result
  end

  def floor
    result = create_dummy_nmatrix
    # Need to be changed later
    result.dtype = :int64
    result.s = @s.copy.mapToSelf(Floor.new())
    result
  end

  def ceil
    result = create_dummy_nmatrix
    # Need to be changed later
    result.dtype = :int64
    result.s = @s.copy.mapToSelf(Ceil.new())
    result
  end

  def round
    result = create_dummy_nmatrix
    # Need to be changed later
    result.dtype = :int64
    result.s = ArrayRealVector.new MathHelper.round(@s.toArray)
    result
  end

end

================================================
FILE: lib/nmatrix/jruby/slice.rb
================================================
class NMatrix

  def get_slice(dim, args, shape_array)
    slice = {}
    slice[:coords]=[]
    slice[:lengths]=[]
    slice[:single] = true

    argc = args.length

    t = 0
    (0...dim).each do |r|
      v = t == argc ? nil : args[t]

      if(argc - t + r < dim && shape_array[r] ==1)
        slice[:coords][r]  = 0
        slice[:lengths][r] = 1
      elsif v.is_a?(Fixnum)
        v_ = v.to_i.to_int
        if (v_ < 0) # checking for negative indexes
          slice[:coords][r]  = shape_array[r]+v_
        else
          slice[:coords][r]  = v_
        end
        slice[:lengths][r] = 1
        t+=1
      elsif (v.is_a?(Symbol) && v == :*)
        slice[:coords][r] = 0
        slice[:lengths][r] = shape_array[r]
        slice[:single] = false
        t+=1
      elsif v.is_a?(Range)
        begin_ = v.begin
        end_ = v.end
        excl = v.exclude_end?
        slice[:coords][r] = (begin_ < 0) ? shape[r] + begin_ : begin_

        # Exclude last element for a...b range
        if (end_ < 0)
          slice[:lengths][r] = shape_array[r] + end_ - slice[:coords][r] + (excl ? 0 : 1)
        else
          slice[:lengths][r] = end_ - slice[:coords][r] + (excl ? 0 : 1)
        end

        slice[:single] = false
        t+=1
      else
        raise(ArgumentError, "expected Fixnum or Range for slice component instead of #{v.class}")
      end

      if (slice[:coords][r] > shape_array[r] || slice[:coords][r] + slice[:lengths][r] > shape_array[r])
        raise(RangeError, "slice is larger than matrix in dimension #{r} (slice component #{t})")
      end
    end

    return slice
  end

  def get_stride(nmatrix)
    stride = Array.new()
    (0...nmatrix.dim).each do |i|
      stride[i] = 1;
      (i+1...dim).each do |j|
        stride[i] *= nmatrix.shape[j]
      end
    end
    stride
  end

  def xslice(args)
    result = nil

    if self.dim < args.length
      raise(ArgumentError,"wrong number of arguments (#{args} for #{effective_dim(self)})")
    else
      result = Array.new()

      slice = get_slice(@dim, args, @shape)
      stride = get_stride(self)
      if slice[:single]
        if (@dtype == :object)
          result = @s[dense_storage_get(slice,stride)]
        else
          s = @s.toArray().to_a
          result = @s.getEntry(dense_storage_get(slice,stride))
        end
      else
        result = dense_storage_get(slice,stride)
      end
    end
    return result
  end

  #its by ref
  def xslice_ref(args)
    result = nil

    if self.dim < args.length
      raise(ArgumentError,"wrong number of arguments (#{args} for #{effective_dim(self)})")
    else
      result = Array.new()

      slice = get_slice(@dim, args, @shape)
      stride = get_stride(self)
      if slice[:single]
        if (@dtype == :object)
          result = @s[dense_storage_get(slice,stride)]
        else
          result = @s.getEntry(dense_storage_get(slice,stride))
        end
      else
        result = dense_storage_ref(slice,stride)
      end
    end
    return result
  end

  def dense_storage_get(slice,stride)
    if slice[:single]
      return dense_storage_pos(slice[:coords],stride)
    else
      shape = @shape.dup
      (0...@dim).each do |i|
        shape[i] = slice[:lengths][i]
      end
      psrc = dense_storage_pos(slice[:coords], stride)
      src = {}
      result = NMatrix.new(:copy)
      result.dim = dim
      result.dtype = @dtype
      resultShape= Array.new(dim)
      (0...dim).each do |i|
        resultShape[i]  = slice[:lengths][i]
      end
      result.shape = resultShape
      dest = {}
      src[:stride] = get_stride(self)
      if (@dtype == :object)
        src[:elements] = @s
      else
        src[:elements] = @s.toArray().to_a
      end
      dest[:stride] = get_stride(result)
      dest[:shape] = resultShape
      dest[:elements] = []
      temp = []
      s = (slice_copy(src, dest, slice[:lengths], 0, psrc,0))
      # if
      # arr = Java::double[s.length].new
      if (@dtype == :object)
        arr = Java::boolean[s.length].new
      else
        arr = Java::double[s.length].new
      end
      (0...s.length).each do |i|
        arr[i] = s[i]
      end
      if (@dtype == :object)
        result.s = arr
      else
        result.s = ArrayRealVector.new(arr)
      end

      return result
    end
  end

  def slice_copy(src, dest,lengths, pdest, psrc,n)
    if @dim-n>1
      (0...lengths[n]).each do |i|
        slice_copy(src, dest, lengths,pdest+dest[:stride][n]*i,psrc+src[:stride][n]*i,n+1)
      end
    else
      (0...dest[:shape][n]).each do |p|
        dest[:elements][p+pdest] = src[:elements][p+psrc]
      end
    end
    dest[:elements]
  end

  def dense_storage_coords(s, slice_pos, coords_out, stride, offset)  #array, int, array
    temp_pos = slice_pos;

    (0...dim).each do |i|
      coords_out[i] = (temp_pos - temp_pos % stride[i])/stride[i] - offset[i];
      temp_pos = temp_pos % stride[i]
    end

    return temp_pos
  end

  def dense_storage_pos(coords,stride)
    pos = 0;
    offset = 0
    (0...@dim).each do |i|
      pos += coords[i]  * stride[i] ;
    end
    return pos + offset;
  end

  def slice_set(dest, lengths, pdest, rank, v, v_size, v_offset)
    if (dim - rank > 1)
      (0...lengths[rank]).each do |i|
        slice_set(dest, lengths, pdest + dest[:stride][rank] * i, rank + 1, v, v_size, v_offset);
      end
    else
      (0...lengths[rank]).each do |p|
        v_offset %= v_size if(v_offset >= v_size)
        # elem = dest[:elements]
        # elem[p + pdest] = v[v_offset]
        if @dtype == :object
          @s[p + pdest] = v[v_offset]
        else
          @s.setEntry(p + pdest, v[v_offset])
        end
        v_offset += 1
      end
    end
  end

  def dense_storage_set(slice, right)
    stride = get_stride(self)
    v_size = 1

    if right.is_a?(NMatrix)
      right = right.s.toArray.to_a
    end

    if(right.is_a?(Array))
      v_size = right.length
      v = right
      if (dtype == :RUBYOBJ)
        # nm_register_values(reinterpret_cast<VALUE*>(v), v_size)
      end

      (0...v_size).each do |m|
        v[m] = right[m]
      end
    else
      v = [right]
      if (@dtype == :RUBYOBJ)
        # nm_register_values(reinterpret_cast<VALUE*>(v), v_size)
      end
    end
    if(slice[:single])
      # reinterpret_cast<D*>(s->elements)[nm_dense_storage_pos(s, slice->coords)] = v;
      pos = dense_storage_pos(slice[:coords],stride)
      if @dtype == :object
        @s[pos] = v[0]
      else
        @s.setEntry(pos, v[0])
      end
    else
      v_offset = 0
      dest = {}
      dest[:stride] = get_stride(self)
      dest[:shape] = shape
      # dest[:elements] = @s.toArray().to_a
      dense_pos = dense_storage_pos(slice[:coords],stride)
      slice_set(dest, slice[:lengths], dense_pos, 0, v, v_size, v_offset)
    end
  end

end

================================================
FILE: lib/nmatrix/lapack_core.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == lapack_core.rb
#
# This file contains friendlier interfaces to LAPACK functions
# implemented in C.
# This file is only for functions available with the core nmatrix gem
# (no external libraries needed).
#
# Note: most of these functions are borrowed from ATLAS, which is available under a BSD-
# style license.
#++

class NMatrix

  module LAPACK

    #Add functions from C extension to main LAPACK module
    class << self
      NMatrix::Internal::LAPACK.singleton_methods.each do |m|
        define_method m, NMatrix::Internal::LAPACK.method(m).to_proc
      end
    end

    class << self
      # Solve the matrix equation AX = B, where A is a symmetric (or Hermitian)
      # positive-definite matrix. If A is a nxn matrix, B must be mxn.
      # Depending on the value of uplo, only the upper or lower half of +a+
      # is read.
      # This uses the Cholesky decomposition so it should be faster than
      # the generic NMatrix#solve method.
      # Doesn't modify inputs.
      # Requires either the nmatrix-atlas or nmatrix-lapacke gem.
      # * *Arguments* :
      #   - +uplo+ -> Either +:upper+ or +:lower+. Specifies which half of +a+ to read.
      #   - +a+ -> The matrix A.
      #   - +b+ -> The right-hand side B.
      # * *Returns* :
      #   - The solution X
      def posv(uplo, a, b)
        raise(NotImplementedError, "Either the nmatrix-atlas or nmatrix-lapacke gem must be installed to use posv")
      end

      #     laswp(matrix, ipiv) -> NMatrix
      #
      # Permute the columns of a matrix (in-place) according to the Array +ipiv+.
      #
      def laswp(matrix, ipiv)
        raise(ArgumentError, "expected NMatrix for argument 0") unless matrix.is_a?(NMatrix)
        raise(StorageTypeError, "LAPACK functions only work on :dense NMatrix instances") unless matrix.stype == :dense
        raise(ArgumentError, "expected Array ipiv to have no more entries than NMatrix a has columns") if ipiv.size > matrix.shape[1]

        clapack_laswp(matrix.shape[0], matrix, matrix.shape[1], 0, ipiv.size-1, ipiv, 1)
      end

      def alloc_svd_result(matrix)
        [
          NMatrix.new(matrix.shape[0], dtype: matrix.dtype),
          NMatrix.new([[matrix.shape[0],matrix.shape[1]].min,1], dtype: matrix.abs_dtype),
          NMatrix.new(matrix.shape[1], dtype: matrix.dtype)
        ]
      end


      #
      # call-seq:
      #     gesvd(matrix) -> [u, sigma, v_transpose]
      #     gesvd(matrix) -> [u, sigma, v_conjugate_transpose] # complex
      #
      # Compute the singular value decomposition of a matrix using LAPACK's GESVD function.
      #
      # Optionally accepts a +workspace_size+ parameter, which will be honored only if it is larger than what LAPACK
      # requires.
      #
      # Requires either the nmatrix-lapacke or nmatrix-atlas gem.
      #
      def gesvd(matrix, workspace_size=1)
        raise(NotImplementedError,"gesvd requires either the nmatrix-atlas or nmatrix-lapacke gem")
      end

      #
      # call-seq:
      #     gesdd(matrix) -> [u, sigma, v_transpose]
      #     gesdd(matrix) -> [u, sigma, v_conjugate_transpose] # complex
      #
      # Compute the singular value decomposition of a matrix using LAPACK's GESDD function. This uses a divide-and-conquer
      # strategy. See also #gesvd.
      #
      # Optionally accepts a +workspace_size+ parameter, which will be honored only if it is larger than what LAPACK
      # requires.
      #
      # Requires either the nmatrix-lapacke or nmatrix-atlas gem.
      #
      def gesdd(matrix, workspace_size=nil)
        raise(NotImplementedError,"gesvd requires either the nmatrix-atlas or nmatrix-lapacke gem")
      end

      #
      # call-seq:
      #     geev(matrix) -> [eigenvalues, left_eigenvectors, right_eigenvectors]
      #     geev(matrix, :left) -> [eigenvalues, left_eigenvectors]
      #     geev(matrix, :right) -> [eigenvalues, right_eigenvectors]
      #
      # Perform eigenvalue decomposition on a matrix using LAPACK's xGEEV function.
      #
      # +eigenvalues+ is a n-by-1 NMatrix containing the eigenvalues.
      #
      # +right_eigenvalues+ is a n-by-n matrix such that its j'th column
      # contains the (right) eigenvalue of +matrix+ corresponding
      # to the j'th eigenvalue.
      # This means that +matrix+ = RDR^(-1),
      # where R is +right_eigenvalues+ and D is the diagonal matrix formed
      # from +eigenvalues+.
      #
      # +left_eigenvalues+ is n-by-n and its columns are the left
      # eigenvalues of +matrix+, using the {definition of left eigenvalue
      # from LAPACK}[https://software.intel.com/en-us/node/521147].
      #
      # For real dtypes, +eigenvalues+ and the eigenvector matrices
      # will be complex if and only if +matrix+ has complex eigenvalues.
      #
      # Only available if nmatrix-lapack or nmatrix-atlas is installed.
      #
      def geev(matrix, which=:both)
        raise(NotImplementedError, "geev requires either the nmatrix-atlas or nmatrix-lapack gem")
      end

      # The following are functions that used to be implemented in C, but
      # now require nmatrix-atlas to run properly, so we can just
      # implemented their stubs in Ruby.
      def lapack_gesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, lwork)
        raise(NotImplementedError,"lapack_gesvd requires the nmatrix-atlas gem")
      end

      def lapack_gesdd(jobz, m, n, a, lda, s, u, ldu, vt, ldvt, lwork)
        raise(NotImplementedError,"lapack_gesdd requires the nmatrix-atlas gem")
      end

      def lapack_geev(jobvl, jobvr, n, a, lda, w, wi, vl, ldvl, vr, ldvr, lwork)
        raise(NotImplementedError,"lapack_geev requires the nmatrix-atlas gem")
      end

      def clapack_potrf(order, uplo, n, a, lda)
        raise(NotImplementedError,"clapack_potrf requires the nmatrix-atlas gem")
      end

      def clapack_potri(order, uplo, n, a, lda)
        raise(NotImplementedError,"clapack_potri requires the nmatrix-atlas gem")
      end

      def clapack_potrs(order, uplo, n, nrhs, a, lda, b, ldb)
        raise(NotImplementedError,"clapack_potrs requires the nmatrix-atlas gem")
      end

      def clapack_getri(order, n, a, lda, ipiv)
        raise(NotImplementedError,"clapack_getri requires the nmatrix-atlas gem")
      end
    end
  end
end


================================================
FILE: lib/nmatrix/lapack_ext_common.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == lapack_ext_common.rb
#
# Contains functions shared by nmatrix-atlas and nmatrix-lapacke gems.
#++

class NMatrix
  def NMatrix.register_lapack_extension(name)
    if (defined? @@lapack_extension)
      raise "Attempting to load #{name} when #{@@lapack_extension} is already loaded. You can only load one LAPACK extension."
    end

    @@lapack_extension = name
  end

  alias_method :internal_dot, :dot

  def dot(right_v)
    if (right_v.is_a?(NMatrix) && self.stype == :dense && right_v.stype == :dense &&
        self.dim == 2 && right_v.dim == 2 && self.shape[1] == right_v.shape[0])

      result_dtype = NMatrix.upcast(self.dtype,right_v.dtype)
      left = self.dtype == result_dtype ? self : self.cast(dtype: result_dtype)
      right = right_v.dtype == result_dtype ? right_v : right_v.cast(dtype: result_dtype)

      left = left.clone if left.is_ref?
      right = right.clone if right.is_ref?

      result_m = left.shape[0]
      result_n = right.shape[1]
      left_n = left.shape[1]
      vector = result_n == 1
      result = NMatrix.new([result_m,result_n], dtype: result_dtype)

      if vector
        NMatrix::BLAS.cblas_gemv(false, result_m, left_n, 1, left, left_n, right, 1, 0, result, 1)
      else
        NMatrix::BLAS.cblas_gemm(:row, false, false, result_m, result_n, left_n, 1, left, left_n, right, result_n, 0, result, result_n)
      end
      return result
    else
      #internal_dot will handle non-dense matrices (and also dot-products for NMatrix's with dim=1),
      #and also all error-handling if the input is not valid
      self.internal_dot(right_v)
    end
  end
end


================================================
FILE: lib/nmatrix/lapack_plugin.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == lapack_plugin.rb
#
# This file `require`s either nmatrix-atlas or nmatrix-lapacke depending on which
# is available.
#
# The idea is that if a developer wants to use a LAPACK feature which is provided
# by both of these gems (e.g. NMatrix#potrf! or NMatrix::LAPACK.geev),
# but doesn't care which one is installed, they can
# just `require 'nmatrix/lapack_plugin'` rather than having to choose between
# `require 'nmatrix/lapacke'` or `require 'nmatrix/lapacke'` 
#++

begin
  require 'nmatrix/atlas'
rescue LoadError
  begin
    require 'nmatrix/lapacke'
  rescue LoadError
    raise(LoadError,"Either nmatrix-atlas or nmatrix-lapacke must be installed")
  end
end


================================================
FILE: lib/nmatrix/lapacke.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == lapacke.rb
#
# ruby file for the nmatrix-lapacke gem. Loads the C extension and defines
# nice ruby interfaces for LAPACK functions.
#++

require 'nmatrix/nmatrix.rb' #need to have nmatrix required first or else bad things will happen
require_relative 'lapack_ext_common'

NMatrix.register_lapack_extension("nmatrix-lapacke")

require "nmatrix_lapacke.so"

class NMatrix
  #Add functions from the LAPACKE C extension to the main LAPACK and BLAS modules.
  #This will overwrite the original functions where applicable.
  module LAPACK
    class << self
      NMatrix::LAPACKE::LAPACK.singleton_methods.each do |m|
        define_method m, NMatrix::LAPACKE::LAPACK.method(m).to_proc
      end
    end
  end

  module BLAS
    class << self
      NMatrix::LAPACKE::BLAS.singleton_methods.each do |m|
        define_method m, NMatrix::LAPACKE::BLAS.method(m).to_proc
      end
    end
  end

  module LAPACK
    class << self
      def posv(uplo, a, b)
        raise(ShapeError, "a must be square") unless a.dim == 2 && a.shape[0] == a.shape[1]
        raise(ShapeError, "number of rows of b must equal number of cols of a") unless a.shape[1] == b.shape[0]
        raise(StorageTypeError, "only works with dense matrices") unless a.stype == :dense && b.stype == :dense
        raise(DataTypeError, "only works for non-integer, non-object dtypes") if 
          a.integer_dtype? || a.object_dtype? || b.integer_dtype? || b.object_dtype?

        x     = b.clone
        clone = a.clone
        n = a.shape[0]
        nrhs = b.shape[1]
        lapacke_potrf(:row, uplo, n, clone, n)
        lapacke_potrs(:row, uplo, n, nrhs, clone, n, x, b.shape[1])
        x
      end

      def geev(matrix, which=:both)
        raise(StorageTypeError, "LAPACK functions only work on dense matrices") unless matrix.dense?
        raise(ShapeError, "eigenvalues can only be computed for square matrices") unless matrix.dim == 2 && matrix.shape[0] == matrix.shape[1]

        jobvl = (which == :both || which == :left) ? :t : false
        jobvr = (which == :both || which == :right) ? :t : false

        # Copy the matrix so it doesn't get overwritten.
        temporary_matrix = matrix.clone
        n = matrix.shape[0]

        # Outputs
        eigenvalues = NMatrix.new([n, 1], dtype: matrix.dtype) # For real dtypes this holds only the real part of the eigenvalues.
        imag_eigenvalues = matrix.complex_dtype? ? nil : NMatrix.new([n, 1], dtype: matrix.dtype) # For complex dtypes, this is unused.
        left_output      = jobvl ? matrix.clone_structure : nil
        right_output     = jobvr ? matrix.clone_structure : nil

        NMatrix::LAPACK::lapacke_geev(:row,
                                      jobvl, # compute left eigenvectors of A?
                                      jobvr, # compute right eigenvectors of A? (left eigenvectors of A**T)
                                      n, # order of the matrix
                                      temporary_matrix,# input matrix (used as work)
                                      n, # leading dimension of matrix
                                      eigenvalues,# real part of computed eigenvalues
                                      imag_eigenvalues,# imag part of computed eigenvalues
                                      left_output,     # left eigenvectors, if applicable
                                      n, # leading dimension of left_output
                                      right_output,    # right eigenvectors, if applicable
                                      n) # leading dimension of right_output


        # For real dtypes, transform left_output and right_output into correct forms.
        # If the j'th and the (j+1)'th eigenvalues form a complex conjugate
        # pair, then the j'th and (j+1)'th columns of the matrix are
        # the real and imag parts of the eigenvector corresponding
        # to the j'th eigenvalue.
        if !matrix.complex_dtype?
          complex_indices = []
          n.times do |i|
            complex_indices << i if imag_eigenvalues[i] != 0.0
          end

          if !complex_indices.empty?
            # For real dtypes, put the real and imaginary parts together
            eigenvalues = eigenvalues + imag_eigenvalues*Complex(0.0,1.0)
            left_output = left_output.cast(dtype: NMatrix.upcast(:complex64, matrix.dtype)) if left_output
            right_output = right_output.cast(dtype: NMatrix.upcast(:complex64, matrix.dtype)) if right_output
          end

          complex_indices.each_slice(2) do |i, _|
            if right_output
              right_output[0...n,i] = right_output[0...n,i] + right_output[0...n,i+1]*Complex(0.0,1.0)
              right_output[0...n,i+1] = right_output[0...n,i].complex_conjugate
            end

            if left_output
              left_output[0...n,i] = left_output[0...n,i] + left_output[0...n,i+1]*Complex(0.0,1.0)
              left_output[0...n,i+1] = left_output[0...n,i].complex_conjugate
            end
          end
        end

        if which == :both
          return [eigenvalues, left_output, right_output]
        elsif which == :left
          return [eigenvalues, left_output]
        else
          return [eigenvalues, right_output]
        end
      end

      def gesvd(matrix, workspace_size=1)
        result = alloc_svd_result(matrix)

        m = matrix.shape[0]
        n = matrix.shape[1]

        superb = NMatrix.new([[m,n].min], dtype: matrix.abs_dtype)

        NMatrix::LAPACK::lapacke_gesvd(:row, :a, :a, m, n, matrix, n, result[1], result[0], m, result[2], n, superb)
        result
      end

      def gesdd(matrix, workspace_size=nil)
        result = alloc_svd_result(matrix)

        m = matrix.shape[0]
        n = matrix.shape[1]

        NMatrix::LAPACK::lapacke_gesdd(:row, :a, m, n, matrix, n, result[1], result[0], m, result[2], n)
        result
      end
    end
  end

  def getrf!
    raise(StorageTypeError, "LAPACK functions only work on dense matrices") unless self.dense?

    ipiv = NMatrix::LAPACK::lapacke_getrf(:row, self.shape[0], self.shape[1], self, self.shape[1])

    return ipiv
  end

  def invert!
    raise(StorageTypeError, "invert only works on dense matrices currently") unless self.dense?
    raise(ShapeError, "Cannot invert non-square matrix") unless shape[0] == shape[1]
    raise(DataTypeError, "Cannot invert an integer matrix in-place") if self.integer_dtype?

    # Get the pivot array; factor the matrix
    n = self.shape[0]
    pivot = NMatrix::LAPACK::lapacke_getrf(:row, n, n, self, n)
    # Now calculate the inverse using the pivot array
    NMatrix::LAPACK::lapacke_getri(:row, n, self, n, pivot)

    self
  end

  def potrf!(which)
    raise(StorageTypeError, "LAPACK functions only work on dense matrices") unless self.dense?
    raise(ShapeError, "Cholesky decomposition only valid for square matrices") unless self.dim == 2 && self.shape[0] == self.shape[1]

    NMatrix::LAPACK::lapacke_potrf(:row, which, self.shape[0], self, self.shape[1])
  end

  def solve(b, opts = {})
    raise(ShapeError, "Must be called on square matrix") unless self.dim == 2 && self.shape[0] == self.shape[1]
    raise(ShapeError, "number of rows of b must equal number of cols of self") if 
      self.shape[1] != b.shape[0]
    raise(ArgumentError, "only works with dense matrices") if self.stype != :dense
    raise(ArgumentError, "only works for non-integer, non-object dtypes") if 
      integer_dtype? or object_dtype? or b.integer_dtype? or b.object_dtype?

    opts = { form: :general }.merge(opts)
    x    = b.clone
    n    = self.shape[0]
    nrhs = b.shape[1]

    case opts[:form] 
    when :general
      clone = self.clone
      ipiv = NMatrix::LAPACK.lapacke_getrf(:row, n, n, clone, n)
      NMatrix::LAPACK.lapacke_getrs(:row, :no_transpose, n, nrhs, clone, n, ipiv, x, nrhs)
      x
    when :upper_tri, :upper_triangular
      raise(ArgumentError, "upper triangular solver does not work with complex dtypes") if
        complex_dtype? or b.complex_dtype?
      NMatrix::BLAS::cblas_trsm(:row, :left, :upper, false, :nounit, n, nrhs, 1.0, self, n, x, nrhs)
      x
    when :lower_tri, :lower_triangular
      raise(ArgumentError, "lower triangular solver does not work with complex dtypes") if
        complex_dtype? or b.complex_dtype?
      NMatrix::BLAS::cblas_trsm(:row, :left, :lower, false, :nounit, n, nrhs, 1.0, self, n, x, nrhs)
      x
    when :pos_def, :positive_definite
      u, l = self.factorize_cholesky
      z = l.solve(b, form: :lower_tri)
      u.solve(z, form: :upper_tri)
    else
      raise(ArgumentError, "#{opts[:form]} is not a valid form option")
    end
  end

  #
  # call-seq:
  #     geqrf! -> shape.min x 1 NMatrix 
  #
  # QR factorization of a general M-by-N matrix +A+. 
  #
  # The QR factorization is A = QR, where Q is orthogonal and R is Upper Triangular
  # +A+ is overwritten with the elements of R and Q with Q being represented by the 
  # elements below A's diagonal and an array of scalar factors in the output NMatrix. 
  #
  # The matrix Q is represented as a product of elementary reflectors
  #     Q = H(1) H(2) . . . H(k), where k = min(m,n).
  #
  # Each H(i) has the form
  #
  #     H(i) = I - tau * v * v'
  #
  # http://www.netlib.org/lapack/explore-html/d3/d69/dgeqrf_8f.html
  # 
  # Only works for dense matrices.
  #
  # * *Returns* :
  #   - Vector TAU. Q and R are stored in A. Q is represented by TAU and A
  # * *Raises* :
  #   - +StorageTypeError+ -> LAPACK functions only work on dense matrices.
  #
  def geqrf!
    raise(StorageTypeError, "LAPACK functions only work on dense matrices") unless self.dense?
    
    tau = NMatrix.new([self.shape.min,1], dtype: self.dtype)
    NMatrix::LAPACK::lapacke_geqrf(:row, self.shape[0], self.shape[1], self, self.shape[1], tau)
    
    tau
  end
  
  #
  # call-seq:
  #     ormqr(tau) -> NMatrix
  #     ormqr(tau, side, transpose, c) -> NMatrix
  #
  # Returns the product Q * c or c * Q after a call to geqrf! used in QR factorization. 
  # +c+ is overwritten with the elements of the result NMatrix if supplied. Q is the orthogonal matrix 
  # represented by tau and the calling NMatrix
  # 
  # Only works on float types, use unmqr for complex types.
  #
  # == Arguments
  #
  # * +tau+ - vector containing scalar factors of elementary reflectors
  # * +side+ - direction of multiplication [:left, :right]
  # * +transpose+ - apply Q with or without transpose [false, :transpose] 
  # * +c+ - NMatrix multplication argument that is overwritten, no argument assumes c = identity
  #
  # * *Returns* :
  #
  #   - Q * c or c * Q Where Q may be transposed before multiplication. 
  #    
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> LAPACK functions only work on dense matrices.
  #   - +TypeError+ -> Works only on floating point matrices, use unmqr for complex types
  #   - +TypeError+ -> c must have the same dtype as the calling NMatrix
  #
  def ormqr(tau, side=:left, transpose=false, c=nil)
    raise(StorageTypeError, "LAPACK functions only work on dense matrices") unless self.dense?
    raise(TypeError, "Works only on floating point matrices, use unmqr for complex types") if self.complex_dtype?
    raise(TypeError, "c must have the same dtype as the calling NMatrix") if c and c.dtype != self.dtype


    #Default behaviour produces Q * I  = Q if c is not supplied.
    result = c ? c.clone : NMatrix.identity(self.shape[0], dtype: self.dtype)
    NMatrix::LAPACK::lapacke_ormqr(:row, side, transpose, result.shape[0], result.shape[1], tau.shape[0], self, self.shape[1], tau, result, result.shape[1])
    
    result
  end

  #
  # call-seq:
  #     unmqr(tau) -> NMatrix
  #     unmqr(tau, side, transpose, c) -> NMatrix
  #
  # Returns the product Q * c or c * Q after a call to geqrf! used in QR factorization. 
  # +c+ is overwritten with the elements of the result NMatrix if it is supplied. Q is the orthogonal matrix 
  # represented by tau and the calling NMatrix
  # 
  # Only works on complex types, use ormqr for float types.
  #
  # == Arguments
  #
  # * +tau+ - vector containing scalar factors of elementary reflectors
  # * +side+ - direction of multiplication [:left, :right]
  # * +transpose+ - apply Q as Q or its complex conjugate [false, :complex_conjugate] 
  # * +c+ - NMatrix multplication argument that is overwritten, no argument assumes c = identity
  #
  # * *Returns* :
  #
  #   - Q * c or c * Q Where Q may be transformed to its complex conjugate before multiplication. 
  #    
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> LAPACK functions only work on dense matrices.
  #   - +TypeError+ -> Works only on floating point matrices, use unmqr for complex types
  #   - +TypeError+ -> c must have the same dtype as the calling NMatrix
  #
  def unmqr(tau, side=:left, transpose=false, c=nil)
    raise(StorageTypeError, "ATLAS functions only work on dense matrices") unless self.dense?
    raise(TypeError, "Works only on complex matrices, use ormqr for normal floating point matrices") unless self.complex_dtype?
    raise(TypeError, "c must have the same dtype as the calling NMatrix") if c and c.dtype != self.dtype

    #Default behaviour produces Q * I  = Q if c is not supplied.
    result = c ? c.clone : NMatrix.identity(self.shape[0], dtype: self.dtype)
    NMatrix::LAPACK::lapacke_unmqr(:row, side, transpose, result.shape[0], result.shape[1], tau.shape[0], self, self.shape[1], tau, result, result.shape[1])
    
    result
  end


end


================================================
FILE: lib/nmatrix/math.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == math.rb
#
# Math functionality for NMatrix, along with any NMatrix instance
# methods that correspond to ATLAS/BLAS/LAPACK functions (e.g.,
# laswp).
#++

class NMatrix

  module NMMath #:nodoc:
    METHODS_ARITY_2 = [:atan2, :ldexp, :hypot]
    METHODS_ARITY_1 = [:cos, :sin, :tan, :acos, :asin, :atan, :cosh, :sinh, :tanh, :acosh,
      :asinh, :atanh, :exp, :log2, :log10, :sqrt, :cbrt, :erf, :erfc, :gamma, :-@]
  end

  # Methods for generating permutation matrix from LU factorization results.
  module FactorizeLUMethods
    class << self
      def permutation_matrix_from(pivot_array)
        perm_arry = permutation_array_for(pivot_array)
        n         = NMatrix.zeros(perm_arry.size, dtype: :byte)

        perm_arry.each_with_index { |e, i| n[e,i] = 1 }

        n
      end

      def permutation_array_for(pivot_array)
        perm_arry = Array.new(pivot_array.size) { |i| i }
        perm_arry.each_index do |i|
          #the pivot indices returned by LAPACK getrf are indexed starting
          #from 1, so we need to subtract 1 here
          perm_arry[i], perm_arry[pivot_array[i]-1] = perm_arry[pivot_array[i]-1], perm_arry[i]
        end

        perm_arry
      end
    end
  end

  #
  # call-seq:
  #     invert! -> NMatrix
  #
  # Use LAPACK to calculate the inverse of the matrix (in-place) if available.
  # Only works on dense matrices. Alternatively uses in-place Gauss-Jordan
  # elimination.
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> only implemented on dense matrices.
  #   - +ShapeError+ -> matrix must be square.
  #   - +DataTypeError+ -> cannot invert an integer matrix in-place.
  #
  def invert!
    raise(StorageTypeError, "invert only works on dense matrices currently") unless self.dense?
    raise(ShapeError, "Cannot invert non-square matrix") unless self.dim == 2 && self.shape[0] == self.shape[1]
    raise(DataTypeError, "Cannot invert an integer matrix in-place") if self.integer_dtype?

    #No internal implementation of getri, so use this other function
    __inverse__(self, true)
  end

  #
  # call-seq:
  #     invert -> NMatrix
  #
  # Make a copy of the matrix, then invert using Gauss-Jordan elimination.
  # Works without LAPACK.
  #
  # * *Returns* :
  #   - A dense NMatrix. Will be the same type as the input NMatrix,
  #   except if the input is an integral dtype, in which case it will be a
  #   :float64 NMatrix.
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> only implemented on dense matrices.
  #   - +ShapeError+ -> matrix must be square.
  #
  def invert
    #write this in terms of invert! so plugins will only have to overwrite
    #invert! and not invert
    if self.integer_dtype?
      cloned = self.cast(dtype: :float64)
      cloned.invert!
    else
      cloned = self.clone
      cloned.invert!
    end
  end
  alias :inverse :invert

  # call-seq:
  #     exact_inverse! -> NMatrix
  #
  # Calulates inverse_exact of a matrix of size 2 or 3.
  # Only works on dense matrices.
  #
  # * *Raises* :
  #   - +DataTypeError+ -> cannot invert an integer matrix in-place.
  #   - +NotImplementedError+ -> cannot find exact inverse of matrix with size greater than 3  #
  def exact_inverse!
    raise(ShapeError, "Cannot invert non-square matrix") unless self.dim == 2 && self.shape[0] == self.shape[1]
    raise(DataTypeError, "Cannot invert an integer matrix in-place") if self.integer_dtype?
    #No internal implementation of getri, so use this other function
    n = self.shape[0]
    if n>3
      raise(NotImplementedError, "Cannot find exact inverse of matrix of size greater than 3")
    else
      clond=self.clone
      __inverse_exact__(clond, n, n)
    end
  end

  #
  # call-seq:
  #     exact_inverse -> NMatrix
  #
  # Make a copy of the matrix, then invert using exact_inverse
  #
  # * *Returns* :
  #   - A dense NMatrix. Will be the same type as the input NMatrix,
  #   except if the input is an integral dtype, in which case it will be a
  #   :float64 NMatrix.
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> only implemented on dense matrices.
  #   - +ShapeError+ -> matrix must be square.
  #   - +NotImplementedError+ -> cannot find exact inverse of matrix with size greater than 3
  #
  def exact_inverse
    #write this in terms of exact_inverse! so plugins will only have to overwrite
    #exact_inverse! and not exact_inverse
    if self.integer_dtype?
      cloned = self.cast(dtype: :float64)
      cloned.exact_inverse!
    else
      cloned = self.clone
      cloned.exact_inverse!
    end
  end
  alias :invert_exactly :exact_inverse


  #
  # call-seq:
  #     pinv -> NMatrix
  #
  # Compute the Moore-Penrose pseudo-inverse of a matrix using its
  # singular value decomposition (SVD).
  #
  # This function requires the nmatrix-atlas gem installed.
  #
  # * *Arguments* :
  #  - +tolerance(optional)+ -> Cutoff for small singular values.
  #
  # * *Returns* :
  #   -  Pseudo-inverse matrix.
  #
  # * *Raises* :
  #   - +NotImplementedError+ -> If called without nmatrix-atlas or nmatrix-lapacke gem.
  #   - +TypeError+ -> If called without float or complex data type.
  #
  # * *Examples* :
  #
  #  a = NMatrix.new([2,2],[1,2,
  #                         3,4], dtype: :float64)
  #  a.pinv # => [ [-2.0000000000000018, 1.0000000000000007]
  #                [1.5000000000000016, -0.5000000000000008] ]
  #
  #  b = NMatrix.new([4,1],[1,2,3,4], dtype: :float64)
  #  b.pinv # => [ [ 0.03333333, 0.06666667, 0.99999999, 0.13333333] ]
  #
  # == References
  #
  # * https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_pseudoinverse
  # * G. Strang, Linear Algebra and Its Applications, 2nd Ed., Orlando, FL, Academic Press
  #
  def pinv(tolerance = 1e-15)
    raise DataTypeError, "pinv works only with matrices of float or complex data type" unless
      [:float32, :float64, :complex64, :complex128].include?(dtype)
    if self.complex_dtype?
      u, s, vt = self.complex_conjugate.gesvd # singular value decomposition
    else
      u, s, vt = self.gesvd
    end
    rows = self.shape[0]
    cols = self.shape[1]
    if rows < cols
      u_reduced = u
      vt_reduced = vt[0..rows - 1, 0..cols - 1].transpose
    else
      u_reduced = u[0..rows - 1, 0..cols - 1]
      vt_reduced = vt.transpose
    end
    largest_singular_value = s.max.to_f
    cutoff = tolerance * largest_singular_value
    (0...[rows, cols].min).each do |i|
      s[i] = 1 / s[i] if s[i] > cutoff
      s[i] = 0        if s[i] <= cutoff
    end
    multiplier = u_reduced.dot(NMatrix.diagonal(s.to_a)).transpose
    vt_reduced.dot(multiplier)
  end
  alias :pseudo_inverse :pinv
  alias :pseudoinverse :pinv


  #
  # call-seq:
  #     adjugate! -> NMatrix
  #
  # Calculate the adjugate of the matrix (in-place).
  # Only works on dense matrices.
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> only implemented on dense matrices.
  #   - +ShapeError+ -> matrix must be square.
  #   - +DataTypeError+ -> cannot calculate adjugate of an integer matrix in-place.
  #
  def adjugate!
    raise(StorageTypeError, "adjugate only works on dense matrices currently") unless self.dense?
    raise(ShapeError, "Cannot calculate adjugate of a non-square matrix") unless self.dim == 2 && self.shape[0] == self.shape[1]
    raise(DataTypeError, "Cannot calculate adjugate of an integer matrix in-place") if self.integer_dtype?
    d = self.det
    self.invert!
    self.map! { |e| e * d }
    self
  end
  alias :adjoint! :adjugate!

  #
  # call-seq:
  #     adjugate -> NMatrix
  #
  # Make a copy of the matrix and calculate the adjugate of the matrix.
  # Only works on dense matrices.
  #
  # * *Returns* :
  #   - A dense NMatrix. Will be the same type as the input NMatrix,
  #   except if the input is an integral dtype, in which case it will be a
  #   :float64 NMatrix.
  #
  # * *Raises* :
  #   - +StorageTypeError+ -> only implemented on dense matrices.
  #   - +ShapeError+ -> matrix must be square.
  #
  def adjugate
    raise(StorageTypeError, "adjugate only works on dense matrices currently") unless self.dense?
    raise(ShapeError, "Cannot calculate adjugate of a non-square matrix") unless self.dim == 2 && self.shape[0] == self.shape[1]
    d = self.det
    mat = self.invert
    mat.map! { |e| e * d }
    mat
  end
  alias :adjoint :adjugate

  # Reduce self to upper hessenberg form using householder transforms.
  #
  # == References
  #
  # * http://en.wikipedia.org/wiki/Hessenberg_matrix
  # * http://www.mymathlib.com/c_source/matrices/eigen/hessenberg_orthog.c
  def hessenberg
    clone.hessenberg!
  end

  # Destructive version of #hessenberg
  def hessenberg!
    raise ShapeError, "Trying to reduce non 2D matrix to hessenberg form" if
      shape.size != 2
    raise ShapeError, "Trying to reduce non-square matrix to hessenberg form" if
      shape[0] != shape[1]
    raise StorageTypeError, "Matrix must be dense" if stype != :dense
    raise TypeError, "Works with float matrices only" unless
      [:float64,:float32].include?(dtype)

    __hessenberg__(self)
    self
  end


  # call-seq:
  #   matrix_norm -> Numeric
  #
  #  Calculates the selected norm (defaults to 2-norm) of a 2D matrix.
  #
  #  This should be used for small or medium sized matrices.
  #  For greater matrices, there should be a separate implementation where
  #  the norm is estimated rather than computed, for the sake of computation speed.
  #
  #  Currently implemented norms are 1-norm, 2-norm, Frobenius, Infinity.
  #  A minus on the 1, 2 and inf norms returns the minimum instead of the maximum value.
  #
  #  Tested mainly with dense matrices. Further checks and modifications might
  #  be necessary for sparse matrices.
  #
  # * *Returns* :
  # - The selected norm of the matrix.
  # * *Raises* :
  # - +NotImplementedError+ -> norm can be calculated only for 2D matrices
  # - +ArgumentError+ -> unrecognized norm
  #
  def matrix_norm type = 2
    raise(NotImplementedError, "norm can be calculated only for 2D matrices") unless self.dim == 2
    raise(NotImplementedError, "norm only implemented for dense storage") unless self.stype == :dense
    raise(ArgumentError, "norm not defined for byte dtype")if self.dtype == :byte
    case type
    when nil, 2, -2
      return self.two_matrix_norm (type == -2)
    when 1, -1
      return self.one_matrix_norm (type == -1)
    when :frobenius, :fro
      return self.fro_matrix_norm
    when :infinity, :inf, :'-inf', :'-infinity'
      return self.inf_matrix_norm  (type == :'-inf' || type == :'-infinity')
    else
      raise ArgumentError.new("argument must be a valid integer or symbol")
    end
  end

  # Calculate the variance co-variance matrix
  #
  # == Options
  #
  # * +:for_sample_data+ - Default true. If set to false will consider the denominator for
  #   population data (i.e. N, as opposed to N-1 for sample data).
  #
  # == References
  #
  # * http://stattrek.com/matrix-algebra/covariance-matrix.aspx
  def cov(opts={})
    raise TypeError, "Only works for non-integer dtypes" if integer_dtype?
     opts = {
      for_sample_data: true
    }.merge(opts)

    denominator      = opts[:for_sample_data] ? rows - 1 : rows
    ones             = NMatrix.ones [rows,1]
    deviation_scores = self - ones.dot(ones.transpose).dot(self) / rows
    deviation_scores.transpose.dot(deviation_scores) / denominator
  end

  # Calculate the correlation matrix.
  def corr
    raise NotImplementedError, "Does not work for complex dtypes" if complex_dtype?
    standard_deviation = std
    cov / (standard_deviation.transpose.dot(standard_deviation))
  end

  # Raise a square matrix to a power. Be careful of numeric overflows!
  # In case *n* is 0, an identity matrix of the same dimension is returned. In case
  # of negative *n*, the matrix is inverted and the absolute value of *n* taken
  # for computing the power.
  #
  # == Arguments
  #
  # * +n+ - Integer to which self is to be raised.
  #
  # == References
  #
  # * R.G Dromey - How to Solve it by Computer. Link -
  #     http://www.amazon.com/Solve-Computer-Prentice-Hall-International-Science/dp/0134340019/ref=sr_1_1?ie=UTF8&qid=1422605572&sr=8-1&keywords=how+to+solve+it+by+computer
  def pow n
    raise ShapeError, "Only works with 2D square matrices." if
      shape[0] != shape[1] or shape.size != 2
    raise TypeError, "Only works with integer powers" unless n.is_a?(Integer)

    sequence = (integer_dtype? ? self.cast(dtype: :int64) : self).clone
    product  = NMatrix.eye shape[0], dtype: sequence.dtype, stype: sequence.stype

    if n == 0
      return NMatrix.eye(shape, dtype: dtype, stype: stype)
    elsif n == 1
      return sequence
    elsif n < 0
      n = n.abs
      sequence.invert!
      product = NMatrix.eye shape[0], dtype: sequence.dtype, stype: sequence.stype
    end

    # Decompose n to reduce the number of multiplications.
    while n > 0
      product = product.dot(sequence) if n % 2 == 1
      n = n / 2
      sequence = sequence.dot(sequence)
    end

    product
  end

  # Compute the Kronecker product of +self+ and other NMatrix
  #
  # === Arguments
  #
  #   * +mat+ - A 2D NMatrix object
  #
  # === Usage
  #
  #  a = NMatrix.new([2,2],[1,2,
  #                         3,4])
  #  b = NMatrix.new([2,3],[1,1,1,
  #                         1,1,1], dtype: :float64)
  #  a.kron_prod(b) # => [ [1.0, 1.0, 1.0, 2.0, 2.0, 2.0]
  #                        [1.0, 1.0, 1.0, 2.0, 2.0, 2.0]
  #                        [3.0, 3.0, 3.0, 4.0, 4.0, 4.0]
  #                        [3.0, 3.0, 3.0, 4.0, 4.0, 4.0] ]
  #
  def kron_prod(mat)
    unless self.dimensions==2 and mat.dimensions==2
      raise ShapeError, "Implemented for 2D NMatrix objects only."
    end

    # compute the shape [n,m] of the product matrix
    n, m = self.shape[0]*mat.shape[0], self.shape[1]*mat.shape[1]
    # compute the entries of the product matrix
    kron_prod_array = []
    if self.yale?
      # +:yale+ requires to get the row by copy in order to apply +#transpose+ to it
      self.each_row(getby=:copy) do |selfr|
        mat.each_row do |matr|
          kron_prod_array += (selfr.transpose.dot matr).to_flat_a
        end
      end
    else
      self.each_row do |selfr|
        mat.each_row do |matr|
          kron_prod_array += (selfr.transpose.dot matr).to_flat_a
        end
      end
    end

    NMatrix.new([n,m], kron_prod_array)
  end

  #
  # call-seq:
  #     trace -> Numeric
  #
  # Calculates the trace of an nxn matrix.
  #
  # * *Raises* :
  #   - +ShapeError+ -> Expected square matrix
  #
  # * *Returns* :
  #   - The trace of the matrix (a numeric value)
  #
  def trace
    raise(ShapeError, "Expected square matrix") unless self.shape[0] == self.shape[1] && self.dim == 2

    (0...self.shape[0]).inject(0) do |total,i|
      total + self[i,i]
    end
  end

  ##
  # call-seq:
  #   mean() -> NMatrix
  #   mean(dimen) -> NMatrix
  #
  # Calculates the mean along the specified dimension.
  #
  # This will force integer types to float64 dtype.
  #
  # @see #inject_rank
  #
  def mean(dimen=0)
    reduce_dtype = nil
    if integer_dtype? then
      reduce_dtype = :float64
    end
    inject_rank(dimen, 0.0, reduce_dtype) do |mean, sub_mat|
      mean + sub_mat
    end / shape[dimen]
  end

  ##
  # call-seq:
  #   sum() -> NMatrix
  #   cumsum() -> NMatrix
  #   sum(dimen) -> NMatrix
  #   cumsum(dimen) -> NMatrix
  #
  # Calculates the sum along the specified dimension.
  #
  # @see #inject_rank
  def sum(dimen=0)
    inject_rank(dimen, 0.0) do |sum, sub_mat|
      sum + sub_mat
    end
  end
  alias :cumsum :sum

  ##
  # call-seq:
  #   min() -> NMatrix
  #   min(dimen) -> NMatrix
  #
  # Calculates the minimum along the specified dimension.
  #
  # @see #inject_rank
  #
  def min(dimen=0)
    inject_rank(dimen) do |min, sub_mat|
      if min.is_a? NMatrix then
        min * (min <= sub_mat).cast(self.stype, self.dtype) + ((min)*0.0 + (min > sub_mat).cast(self.stype, self.dtype)) * sub_mat
      else
        min <= sub_mat ? min : sub_mat
      end
    end
  end

  ##
  # call-seq:
  #   max() -> NMatrix
  #   max(dimen) -> NMatrix
  #
  # Calculates the maximum along the specified dimension.
  #
  # @see #inject_rank
  #
  def max(dimen=0)
    inject_rank(dimen) do |max, sub_mat|
      if max.is_a? NMatrix then
        max * (max >= sub_mat).cast(self.stype, self.dtype) + ((max)*0.0 + (max < sub_mat).cast(self.stype, self.dtype)) * sub_mat
      else
        max >= sub_mat ? max : sub_mat
      end
    end
  end


  ##
  # call-seq:
  #   variance() -> NMatrix
  #   variance(dimen) -> NMatrix
  #
  # Calculates the sample variance along the specified dimension.
  #
  # This will force integer types to float64 dtype.
  #
  # @see #inject_rank
  #
  def variance(dimen=0)
    reduce_dtype = nil
    if integer_dtype? then
      reduce_dtype = :float64
    end
    m = mean(dimen)
    inject_rank(dimen, 0.0, reduce_dtype) do |var, sub_mat|
      var + (m - sub_mat)*(m - sub_mat)/(shape[dimen]-1)
    end
  end

  ##
  # call-seq:
  #   std() -> NMatrix
  #   std(dimen) -> NMatrix
  #
  #
  # Calculates the sample standard deviation along the specified dimension.
  #
  # This will force integer types to float64 dtype.
  #
  # @see #inject_rank
  #
  def std(dimen=0)
    variance(dimen).sqrt
  end


  #
  # call-seq:
  #     abs_dtype -> Symbol
  #
  # Returns the dtype of the result of a call to #abs. In most cases, this is the same as dtype; it should only differ
  # for :complex64 (where it's :float32) and :complex128 (:float64).
  def abs_dtype
    if self.dtype == :complex64
      :float32
    elsif self.dtype == :complex128
      :float64
    else
      self.dtype
    end
  end


  #
  # call-seq:
  #     abs -> NMatrix
  #
  # Maps all values in a matrix to their absolute values.
  def abs
    if stype == :dense
      self.__dense_map__ { |v| v.abs }
    elsif stype == :list
      # FIXME: Need __list_map_stored__, but this will do for now.
      self.__list_map_merged_stored__(nil, nil) { |v,dummy| v.abs }
    else
      self.__yale_map_stored__ { |v| v.abs }
    end.cast(self.stype, abs_dtype)
  end

  # Norm calculation methods
  # Frobenius norm: the Euclidean norm of the matrix, treated as if it were a vector
  def fro_matrix_norm
    #float64 has to be used in any case, since nrm2 will not yield correct result for float32
    self_cast = self.cast(:dtype => :float64)

    column_vector = self_cast.reshape([self.size, 1])

    return column_vector.nrm2
  end

  # 2-norm: the largest/smallest singular value of the matrix
  def two_matrix_norm minus = false

    self_cast = self.cast(:dtype => :float64)

    #TODO: confirm if this is the desired svd calculation
    svd = self_cast.gesvd
    return svd[1][0, 0] unless minus
    return svd[1][svd[1].rows-1, svd[1].cols-1]
  end

  # 1-norm: the maximum/minimum absolute column sum of the matrix
  def one_matrix_norm minus = false
    #TODO: change traversing method for sparse matrices
    number_of_columns = self.cols
    col_sums = []

    number_of_columns.times do |i|
      col_sums << self.col(i).inject(0) { |sum, number| sum += number.abs}
    end

    return col_sums.max unless minus
    return col_sums.min
  end

  # Infinity norm: the maximum/minimum absolute row sum of the matrix
  def inf_matrix_norm minus = false
    number_of_rows = self.rows
    row_sums = []

    number_of_rows.times do |i|
      row_sums << self.row(i).inject(0) { |sum, number| sum += number.abs}
    end

    return row_sums.max unless minus
    return row_sums.min
  end

  #
  # call-seq:
  #     positive_definite? -> boolean
  #
  # A matrix is positive definite if it’s symmetric and all its eigenvalues are positive
  #
  # * *Returns* :
  #   - A boolean value telling if the NMatrix is positive definite or not.
  # * *Raises* :
  #   - +ShapeError+ -> Must be used on square matrices.
  #
  def positive_definite?
    raise(ShapeError, "positive definite calculated only for square matrices") unless
      self.dim == 2 && self.shape[0] == self.shape[1]
    cond = 0
    while cond != self.cols
      if self[0..cond, 0..cond].det <= 0
        return false
      end
      cond += 1
    end
    true
  end

  #
  # call-seq:
  #   svd_rank() -> int
  #   svd_rank(tolerence) ->int
  # Gives rank of the matrix based on the singular value decomposition.
  # The rank of a matrix  is computed as the number of diagonal elements in Sigma that are larger than a tolerance
  #
  #* *Returns* :
  # - An integer equal to the rank of the matrix
  #* *Raises* :
  #  - +ShapeError+ -> Is only computable on 2-D matrices
  #
  def svd_rank(tolerence="default")
    raise(ShapeError, "rank calculated only for 2-D matrices") unless
      self.dim == 2 

    sigmas = self.gesvd[1].to_a.flatten
    eps = NMatrix::FLOAT64_EPSILON

    # epsilon depends on the width of the number
    if (self.dtype == :float32 || self.dtype == :complex64) 
      eps = NMatrix::FLOAT32_EPSILON
    end
    case tolerence
      when "default"
        tolerence = self.shape.max * sigmas.max * eps # tolerence of a Matrix A is max(size(A))*eps(norm(A)). norm(A) is nearly equal to max(sigma of A)
    end
    return sigmas.map { |x| x > tolerence ? 1 : 0 }.reduce(:+)
  end


protected
  # Define the element-wise operations for lists. Note that the __list_map_merged_stored__ iterator returns a Ruby Object
  # matrix, which we then cast back to the appropriate type. If you don't want that, you can redefine these functions in
  # your own code.
  {add: :+, sub: :-, mul: :*, div: :/, pow: :**, mod: :%}.each_pair do |ewop, op|
    define_method("__list_elementwise_#{ewop}__") do |rhs|
      self.__list_map_merged_stored__(rhs, nil) { |l,r| l.send(op,r) }.cast(stype, NMatrix.upcast(dtype, rhs.dtype))
    end
    define_method("__dense_elementwise_#{ewop}__") do |rhs|
      self.__dense_map_pair__(rhs) { |l,r| l.send(op,r) }.cast(stype, NMatrix.upcast(dtype, rhs.dtype))
    end
    define_method("__yale_elementwise_#{ewop}__") do |rhs|
      self.__yale_map_merged_stored__(rhs, nil) { |l,r| l.send(op,r) }.cast(stype, NMatrix.upcast(dtype, rhs.dtype))
    end
    define_method("__list_scalar_#{ewop}__") do |rhs|
      self.__list_map_merged_stored__(rhs, nil) { |l,r| l.send(op,r) }.cast(stype, NMatrix.upcast(dtype, NMatrix.min_dtype(rhs)))
    end
    define_method("__yale_scalar_#{ewop}__") do |rhs|
      self.__yale_map_stored__ { |l| l.send(op,rhs) }.cast(stype, NMatrix.upcast(dtype, NMatrix.min_dtype(rhs)))
    end
    define_method("__dense_scalar_#{ewop}__") do |rhs|
      self.__dense_map__ { |l| l.send(op,rhs) }.cast(stype, NMatrix.upcast(dtype, NMatrix.min_dtype(rhs)))
    end
  end

  # These don't actually take an argument -- they're called reverse-polish style on the matrix.
  # This group always gets casted to float64.
  [:log, :log2, :log10, :sqrt, :sin, :cos, :tan, :acos, :asin, :atan, :cosh, :sinh, :tanh, :acosh,
   :asinh, :atanh, :exp, :erf, :erfc, :gamma, :cbrt, :round].each do |ewop|
    define_method("__list_unary_#{ewop}__") do
      self.__list_map_stored__(nil) { |l| Math.send(ewop, l) }.cast(stype, NMatrix.upcast(dtype, :float64))
    end
    define_method("__yale_unary_#{ewop}__") do
      self.__yale_map_stored__ { |l| Math.send(ewop, l) }.cast(stype, NMatrix.upcast(dtype, :float64))
    end
    define_method("__dense_unary_#{ewop}__") do
      self.__dense_map__ { |l| Math.send(ewop, l) }.cast(stype, NMatrix.upcast(dtype, :float64))
    end
  end

  #:stopdoc:
  # log takes an optional single argument, the base. Default to natural log.
  def __list_unary_log__(base)
    self.__list_map_stored__(nil) { |l| Math.log(l, base) }.cast(stype, NMatrix.upcast(dtype, :float64))
  end

  def __yale_unary_log__(base)
    self.__yale_map_stored__ { |l| Math.log(l, base) }.cast(stype, NMatrix.upcast(dtype, :float64))
  end

  def __dense_unary_log__(base)
    self.__dense_map__ { |l| Math.log(l, base) }.cast(stype, NMatrix.upcast(dtype, :float64))
  end

  # These are for negating matrix contents using -@
  def __list_unary_negate__
    self.__list_map_stored__(nil) { |l| -l }.cast(stype, dtype)
  end

  def __yale_unary_negate__
    self.__yale_map_stored__ { |l| -l }.cast(stype, dtype)
  end

  def __dense_unary_negate__
    self.__dense_map__ { |l| -l }.cast(stype, dtype)
  end
  #:startdoc:

  # These are for rounding each value of a matrix. Takes an optional argument
  def __list_unary_round__(precision)
    if self.complex_dtype?
      self.__list_map_stored__(nil) { |l| Complex(l.real.round(precision), l.imag.round(precision)) }
                                    .cast(stype, dtype)
    else
      self.__list_map_stored__(nil) { |l| l.round(precision) }.cast(stype, dtype)
    end
  end

  def __yale_unary_round__(precision)
    if self.complex_dtype?
      self.__yale_map_stored__ { |l| Complex(l.real.round(precision), l.imag.round(precision)) }
                                    .cast(stype, dtype)
    else
      self.__yale_map_stored__ { |l| l.round(precision) }.cast(stype, dtype)
    end
  end

  def __dense_unary_round__(precision)
    if self.complex_dtype?
      self.__dense_map__ { |l| Complex(l.real.round(precision), l.imag.round(precision)) }
                                    .cast(stype, dtype)
    else
      self.__dense_map__ { |l| l.round(precision) }.cast(stype, dtype)
    end
  end

  # These are for calculating the floor or ceil of matrix
  def dtype_for_floor_or_ceil
    if self.integer_dtype? or [:complex64, :complex128, :object].include?(self.dtype)
      return_dtype = dtype
    elsif [:float32, :float64].include?(self.dtype)
      return_dtype = :int64
    end

    return_dtype
  end

  [:floor, :ceil].each do |meth|
    define_method("__list_unary_#{meth}__") do
      return_dtype = dtype_for_floor_or_ceil

      if [:complex64, :complex128].include?(self.dtype)
        self.__list_map_stored__(nil) { |l| Complex(l.real.send(meth), l.imag.send(meth)) }.cast(stype, return_dtype)
      else
        self.__list_map_stored__(nil) { |l| l.send(meth) }.cast(stype, return_dtype)
      end
    end

    define_method("__yale_unary_#{meth}__") do
      return_dtype = dtype_for_floor_or_ceil

      if [:complex64, :complex128].include?(self.dtype)
        self.__yale_map_stored__ { |l| Complex(l.real.send(meth), l.imag.send(meth)) }.cast(stype, return_dtype)
      else
        self.__yale_map_stored__ { |l| l.send(meth) }.cast(stype, return_dtype)
      end
    end

    define_method("__dense_unary_#{meth}__") do
      return_dtype = dtype_for_floor_or_ceil

      if [:complex64, :complex128].include?(self.dtype)
        self.__dense_map__ { |l| Complex(l.real.send(meth), l.imag.send(meth)) }.cast(stype, return_dtype)
      else
        self.__dense_map__ { |l| l.send(meth) }.cast(stype, return_dtype)
      end
    end
  end

  # These take two arguments. One might be a matrix, and one might be a scalar.
  # See also monkeys.rb, which contains Math module patches to let the first
  # arg be a scalar
  [:atan2, :ldexp, :hypot].each do |ewop|
    define_method("__list_elementwise_#{ewop}__") do |rhs,order|
      if order then
        self.__list_map_merged_stored__(rhs, nil) { |r,l| Math.send(ewop,l,r) }
      else
        self.__list_map_merged_stored__(rhs, nil) { |l,r| Math.send(ewop,l,r) }
      end.cast(stype, NMatrix.upcast(dtype, :float64))
    end

    define_method("__dense_elementwise_#{ewop}__") do |rhs, order|
      if order then
        self.__dense_map_pair__(rhs) { |r,l| Math.send(ewop,l,r) }
      else
        self.__dense_map_pair__(rhs) { |l,r| Math.send(ewop,l,r) }
      end.cast(stype, NMatrix.upcast(dtype, :float64))
    end

    define_method("__yale_elementwise_#{ewop}__") do |rhs, order|
      if order then
        self.__yale_map_merged_stored__(rhs, nil) { |r,l| Math.send(ewop,l,r) }
      else
        self.__yale_map_merged_stored__(rhs, nil) { |l,r| Math.send(ewop,l,r) }
      end.cast(stype, NMatrix.upcast(dtype, :float64))
    end

    define_method("__list_scalar_#{ewop}__") do |rhs,order|
      if order then
        self.__list_map_stored__(nil) { |l| Math.send(ewop, rhs, l) }
      else
        self.__list_map_stored__(nil) { |l| Math.send(ewop, l, rhs) }
      end.cast(stype, NMatrix.upcast(dtype, :float64))
    end

    define_method("__yale_scalar_#{ewop}__") do |rhs,order|
      if order then
        self.__yale_map_stored__ { |l| Math.send(ewop, rhs, l) }
      else
        self.__yale_map_stored__ { |l| Math.send(ewop, l, rhs) }
      end.cast(stype, NMatrix.upcast(dtype, :float64))
    end

    define_method("__dense_scalar_#{ewop}__") do |rhs,order|
      if order
        self.__dense_map__ { |l| Math.send(ewop, rhs, l) }
      else
        self.__dense_map__ { |l| Math.send(ewop, l, rhs) }
      end.cast(stype, NMatrix.upcast(dtype, :float64))
    end
  end

  # Equality operators do not involve a cast. We want to get back matrices of TrueClass and FalseClass.
  {eqeq: :==, neq: :!=, lt: :<, gt: :>, leq: :<=, geq: :>=}.each_pair do |ewop, op|
    define_method("__list_elementwise_#{ewop}__") do |rhs|
      self.__list_map_merged_stored__(rhs, nil) { |l,r| l.send(op,r) }
    end
    define_method("__dense_elementwise_#{ewop}__") do |rhs|
      self.__dense_map_pair__(rhs) { |l,r| l.send(op,r) }
    end
    define_method("__yale_elementwise_#{ewop}__") do |rhs|
      self.__yale_map_merged_stored__(rhs, nil) { |l,r| l.send(op,r) }
    end

    define_method("__list_scalar_#{ewop}__") do |rhs|
      self.__list_map_merged_stored__(rhs, nil) { |l,r| l.send(op,r) }
    end
    define_method("__yale_scalar_#{ewop}__") do |rhs|
      self.__yale_map_stored__ { |l| l.send(op,rhs) }
    end
    define_method("__dense_scalar_#{ewop}__") do |rhs|
      self.__dense_map__ { |l| l.send(op,rhs) }
    end
  end
end

if jruby?
  require_relative "./jruby/math.rb"
else
  require_relative "./cruby/math.rb"
end


================================================
FILE: lib/nmatrix/mkmf.rb
================================================
require "mkmf"

if RUBY_VERSION < '1.9'
  raise NotImplementedError, "Sorry, you need at least Ruby 1.9!"
end

# Function derived from NArray's extconf.rb.
def create_conf_h(file) #:nodoc:
  print "creating #{file}\n"
  File.open(file, 'w') do |hfile|
    header_guard = file.upcase.sub(/\s|\./, '_')

    hfile.puts "#ifndef #{header_guard}"
    hfile.puts "#define #{header_guard}"
    hfile.puts

    # FIXME: Find a better way to do this:
    hfile.puts "#define RUBY_2 1" if RUBY_VERSION >= '2.0'

    for line in $defs
      line =~ /^-D(.*)/
      hfile.printf "#define %s 1\n", $1
    end

    hfile.puts
    hfile.puts "#endif"
  end
end

def find_newer_gplusplus #:nodoc:
  print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
  [9,8,7,6,5,4,3].each do |minor|
    ver = "4.#{minor}"
    gpp = "g++-#{ver}"
    result = `which #{gpp}`
    next if result.empty?
    CONFIG['CXX'] = gpp
    puts ver
    return CONFIG['CXX']
  end
  false
end

def gplusplus_version
  cxxvar = proc { |n| `#{CONFIG['CXX']} -E -dM - <#{File::NULL} | grep #{n}`.chomp.split(' ')[2] }
  major = cxxvar.call('__GNUC__')
  minor = cxxvar.call('__GNUC_MINOR__')
  patch = cxxvar.call('__GNUC_PATCHLEVEL__')

  raise("unable to determine g++ version (match to get version was nil)") if major.nil? || minor.nil? || patch.nil?

  "#{major}.#{minor}.#{patch}"
end


if /cygwin|mingw/ =~ RUBY_PLATFORM
  CONFIG["DLDFLAGS"] << " --output-lib libnmatrix.a"
end

# Fix compiler pairing
if CONFIG['CC'] == 'clang' && CONFIG['CXX'] != 'clang++'
  puts "WARNING: CONFIG['CXX'] is not 'clang++' even though CONFIG['CC'] is 'clang'.",
       "WARNING: Force to use clang++ together with clang."

  CONFIG['CXX'] = 'clang++'
end

if CONFIG['CXX'] == 'clang++'
  $CXX_STANDARD = 'c++11'
else
  version = gplusplus_version
  if version < '4.3.0' && CONFIG['CXX'] == 'g++'  # see if we can find a newer G++, unless it's been overridden by user
    if !find_newer_gplusplus
      raise("You need a version of g++ which supports -std=c++0x or -std=c++11. If you're on a Mac and using Homebrew, we recommend using mac-brew-gcc.sh to install a more recent g++.")
    end
    version = gplusplus_version
  end

  if version < '4.7.0'
    $CXX_STANDARD = 'c++0x'
  else
    $CXX_STANDARD = 'c++11'
  end
  puts "using C++ standard... #{$CXX_STANDARD}"
  puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
end

# For release, these next two should both be changed to -O3.
$CFLAGS += " -O3 "
#$CFLAGS += " -static -O0 -g "
$CXXFLAGS += " -O3 -std=#{$CXX_STANDARD} " #-fmax-errors=10 -save-temps
#$CXXFLAGS += " -static -O0 -g -std=#{$CXX_STANDARD} "

if CONFIG.has_key?('warnflags')
  CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
  CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
  CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
end
  
have_func("rb_array_const_ptr", "ruby.h")


================================================
FILE: lib/nmatrix/monkeys.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == monkeys.rb
#
# Ruby core extensions for NMatrix.
#++

#######################
# Classes and Modules #
#######################

class Array
  # Convert a Ruby Array to an NMatrix.
  #
  # You must provide a shape for the matrix as the first argument.
  #
  # == Arguments:
  # <tt>shape</tt> :: Array describing matrix dimensions (or Integer for square).
  #   If not provided, will be intuited through #shape.
  # <tt>dtype</tt> :: Override data type (e.g., to store a Float as :float32
  #   instead of :float64) -- optional.
  # <tt>stype</tt> :: Optional storage type (defaults to :dense)
  def to_nm(shape = nil, dtype = nil, stype = :dense)
    elements = self.dup

    guess_dtype = ->(type) {
      case type
      when Integer  then :int64
      when Float    then :float64
      when Complex  then :complex128
      end
    }

    guess_shape = lambda { |shapey; shape|
      # Get the size of the current dimension
      shape = [shapey.size]
      shape << shapey.map {|s|
        if s.respond_to?(:size) && s.respond_to?(:map)
          guess_shape.call(s)
        else
          nil
        end
      }
      if shape.last.any? {|s| (s != shape.last.first) || s.nil?}
        shape.pop
      end
      if (shape.first != shape.last) && shape.last.all? {|s| s == shape.last.first}
        shape[-1] = shape.last.first
      end
      shape.flatten
    }

    unless shape
      shape = guess_shape.call(elements)
      elements.flatten!(shape.size - 1)
      if elements.flatten != elements
        dtype = :object
      else
        dtype ||= guess_dtype[elements[0]]
      end
    end

    dtype ||= guess_dtype[self[0]]

    matrix = NMatrix.new(:dense, shape, elements, dtype)

    if stype != :dense then matrix.cast(stype, dtype) else matrix end
  end
end

class Object #:nodoc:
  def returning(value)
    yield(value)
    value
  end
end


module Math #:nodoc:
  class << self
    NMatrix::NMMath::METHODS_ARITY_2.each do |meth|
      define_method "nm_#{meth}" do |arg0, arg1|
        if arg0.is_a? NMatrix then
          arg0.send(meth, arg1)
        elsif arg1.is_a? NMatrix then
          arg1.send(meth, arg0, true)
        else
          self.send("old_#{meth}".to_sym, arg0, arg1)
        end
      end
      alias_method "old_#{meth}".to_sym, meth
      alias_method meth, "nm_#{meth}".to_sym
    end
  end
end

class String
  def underscore
    self.gsub(/::/, '/').
    gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
    gsub(/([a-z\d])([A-Z])/,'\1_\2').
    tr("-", "_").
    downcase
  end
end

# Since `autoload` will most likely be deprecated (due to multi-threading concerns),
# we'll use `const_missing`. See: https://www.ruby-forum.com/topic/3036681 for more info.
module AutoloadPatch #:nodoc
  def const_missing(name)
    file = name.to_s.underscore
    require "nmatrix/io/#{file}"
    klass = const_get(name)
    return klass if klass
  end
end


================================================
FILE: lib/nmatrix/nmatrix.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == nmatrix.rb
#
# This file loads the C extension for NMatrix and all the ruby
# files and contains those core functionalities which can be
# implemented efficiently (or much more easily) in Ruby (e.g.,
# inspect, pretty_print, element-wise operations).
#++

# For some reason nmatrix.so ends up in a different place during gem build.

# Detect java
def jruby?
  /java/ === RUBY_PLATFORM
end

if jruby?
  require_relative 'jruby/nmatrix_java'
else
  if File.exist?("lib/nmatrix/nmatrix.so") #|| File.exist?("lib/nmatrix/nmatrix.bundle")
    # Development
    require_relative "nmatrix/nmatrix.so"
  else
    # Gem
    require_relative "../nmatrix.so"
    require_relative './io/mat_reader'
    require_relative './io/mat5_reader'
    require_relative './io/market'
    require_relative './io/point_cloud'

    require_relative './lapack_core.rb'
    require_relative './yale_functions.rb'
  end
end

require_relative './math.rb'
require_relative './monkeys'

# NMatrix is a matrix class that supports both multidimensional arrays
# (`:dense` stype) and sparse storage (`:list` or `:yale` stypes) and 13 data
# types, including complex numbers, various integer and
# floating-point sizes and ruby objects.
class NMatrix
  # Read and write extensions for NMatrix.
  module IO
    extend AutoloadPatch

    # Reader (and eventually writer) of Matlab .mat files.
    #
    # The .mat file format is documented in the following link:
    # * http://www.mathworks.com/help/pdf_doc/matlab/matfile_format.pdf
    module Matlab
      extend AutoloadPatch

      class << self
        # call-seq:
        #     load(mat_file_path) -> NMatrix
        #     load_mat(mat_file_path) -> NMatrix
        #
        # Load a .mat file and return a NMatrix corresponding to it.
        def load_mat(file_path)
          NMatrix::IO::Matlab::Mat5Reader.new(File.open(file_path, "rb+")).to_ruby
        end
        alias :load :load_mat
      end
    end
  end

  class << self
    # call-seq:
    #     load_matlab_file(path) -> Mat5Reader
    #
    # * *Arguments* :
    #   - +file_path+ -> The path to a version 5 .mat file.
    # * *Returns* :
    #   - A Mat5Reader object.
    def load_matlab_file(file_path)
      NMatrix::IO::Matlab::Mat5Reader.new(File.open(file_path, 'rb')).to_ruby
    end

    # call-seq:
    #     load_pcd_file(path) -> PointCloudReader::MetaReader
    #
    # * *Arguments* :
    #   - +file_path+ -> The path to a PCL PCD file.
    # * *Returns* :
    #   - A PointCloudReader::MetaReader object with the matrix stored in its +matrix+ property
    def load_pcd_file(file_path)
      NMatrix::IO::PointCloudReader::MetaReader.new(file_path)
    end

    # Calculate the size of an NMatrix of a given shape.
    def size(shape)
      shape = [shape,shape] unless shape.is_a?(Array)
      (0...shape.size).inject(1) { |x,i| x * shape[i] }
    end

    # Make N-D coordinate arrays for vectorized evaluations of
    # N-D scalar/vector fields over N-D grids, given N
    # coordinate arrays arrs. N > 1.
    #
    # call-seq:
    #     meshgrid(arrs) -> Array of NMatrix
    #     meshgrid(arrs, options) -> Array of NMatrix
    #
    # * *Arguments* :
    #   - +vectors+ -> Array of N coordinate arrays (Array or NMatrix), if any have more than one dimension they will be flatten
    #   - +options+ -> Hash with options (:sparse Boolean, false by default; :indexing Symbol, may be :ij or :xy, :xy by default)
    # * *Returns* :
    #   - Array of N N-D NMatrixes
    # * *Examples* :
    #     x, y = NMatrix::meshgrid([[1, [2, 3]], [4, 5]])
    #     x.to_a #<= [[1, 2, 3], [1, 2, 3]]
    #     y.to_a #<= [[4, 4, 4], [5, 5, 5]]
    #
    # * *Using* *options* :
    #
    #     x, y = NMatrix::meshgrid([[[1, 2], 3], [4, 5]], sparse: true)
    #     x.to_a #<= [[1, 2, 3]]
    #     y.to_a #<= [[4], [5]]
    #
    #     x, y = NMatrix::meshgrid([[1, 2, 3], [[4], 5]], indexing: :ij)
    #     x.to_a #<= [[1, 1], [2, 2], [3, 3]]
    #     y.to_a #<= [[4, 5], [4, 5], [4, 5]]
    def meshgrid(vectors, options = {})
      raise(ArgumentError, 'Expected at least 2 arrays.') if vectors.size < 2
      options[:indexing] ||= :xy
      raise(ArgumentError, 'Indexing must be :xy of :ij') unless [:ij, :xy].include? options[:indexing]
      mats = vectors.map { |arr| arr.respond_to?(:flatten) ? arr.flatten : arr.to_flat_array }
      mats[0], mats[1] = mats[1], mats[0] if options[:indexing] == :xy
      new_dim = mats.size
      lengths = mats.map(&:size)
      result = mats.map.with_index do |matrix, axis|
        if options[:sparse]
          new_shape = Array.new(new_dim, 1)
          new_shape[axis] = lengths[axis]
          new_elements = matrix
        else
          before_axis = lengths[0...axis].reduce(:*)
          after_axis = lengths[(axis+1)..-1].reduce(:*)
          new_shape = lengths
          new_elements = after_axis ? matrix.map{ |el| [el] * after_axis }.flatten : matrix
          new_elements *= before_axis if before_axis
        end
        NMatrix.new(new_shape, new_elements)
      end
      result[0], result[1] = result[1], result[0] if options[:indexing] == :xy
      result
    end
  end

  # TODO: Make this actually pretty.
  def pretty_print(q) #:nodoc:
    if self.shape.size > 1 and self.shape[1] > 100
      self.inspect.pretty_print(q)
    elsif self.dim > 3 || self.dim == 1
      self.to_a.pretty_print(q)
    else
      # iterate through the whole matrix and find the longest number
      longest = Array.new(self.shape[1], 0)
      self.each_column.with_index do |col, j|
        col.each do |elem|
          elem_len   = elem.inspect.size
          longest[j] = elem_len if longest[j] < elem_len
        end
      end

      if self.dim == 3
        q.group(0, "\n{ layers:", "}") do
          self.each_layer.with_index do |layer,k|
            q.group(0, "\n  [\n", "  ]\n") do
              layer.each_row.with_index do |row,i|
                q.group(0, "    [", "]\n") do
                  q.seplist(self[i,0...self.shape[1],k].to_flat_array, lambda { q.text ", "}, :each_with_index) { |v,j| q.text v.inspect.rjust(longest[j]) }
                end
              end
            end
          end
        end
      else # dim 2
        q.group(0, "\n[\n ", "]") do
          self.each_row.with_index do |row, i|
            q.group(1, " [", "]\n") do
              q.seplist(row.to_a, -> { q.text ", " }, :each_with_index) do |v,j|
                q.text v.inspect.rjust(longest[j])
              end
            end
            q.breakable unless i + 1 == self.shape[0]
          end
        end
      end
    end
  end

  #
  # call-seq:
  #     cast(stype, dtype, default) -> NMatrix
  #     cast(stype, dtype) -> NMatrix
  #     cast(stype) -> NMatrix
  #     cast(options) -> NMatrix
  #
  # This is a user-friendly helper for calling #cast_full. The easiest way to call this function is using an
  # options hash, e.g.,
  #
  #     n.cast(:stype => :yale, :dtype => :int64, :default => false)
  #
  # For list and yale, :default sets the "default value" or "init" of the matrix. List allows a bit more freedom
  # since non-zeros are permitted. For yale, unpredictable behavior may result if the value is not false, nil, or
  # some version of 0. Dense discards :default.
  #
  # dtype and stype are inferred from the matrix upon which #cast is called -- so you only really need to provide
  # one. You can actually call this function with no arguments, in which case it functions like #clone.
  #
  # If your dtype is :object and you are converting from :dense to a sparse type, it is recommended that you
  # provide a :default, as 0 may behave differently from its Float or Complex equivalent. If no option
  # is given, Integer 0 will be used.
  def cast(*params)
    if (params.size > 0 && params[0].is_a?(Hash))
      opts = {
          :stype => self.stype,
          :dtype => self.dtype,
          :default => self.stype == :dense ? 0 : self.default_value
      }.merge(params[0])

      self.cast_full(opts[:stype], opts[:dtype], opts[:default])
    else
      params << self.stype if params.size == 0
      params << self.dtype if params.size == 1
      #HACK: the default value can cause an exception if dtype is not complex
      #and default_value is. (The ruby C code apparently won't convert these.)
      #Perhaps this should be fixed in the C code (in rubyval_to_cval).
      default_value = maybe_get_noncomplex_default_value(params[1])
      params << (self.stype == :dense ? 0 : default_value) if params.size == 2
      self.cast_full(*params)
    end

  end


  #
  # call-seq:
  #     rows -> Integer
  #
  # This shortcut use #shape to return the number of rows (the first dimension)
  # of the matrix.
  #
  def rows
    shape[0]
  end

  #
  # call-seq:
  #     cols -> Integer
  #
  # This shortcut use #shape to return the number of columns (the second
  # dimension) of the matrix.
  #
  def cols
    shape[1]
  end

  # Return the main diagonal or antidiagonal a matrix. Only works with 2D matrices.
  #
  # == Arguments
  #
  # * +main_diagonal+ - Defaults to true. If passed 'false', then will return the
  #   antidiagonal of the matrix.
  #
  # == References
  #
  # * http://en.wikipedia.org/wiki/Main_diagonal
  def diagonal main_diagonal=true
    diag_size = [cols, rows].min
    diag = NMatrix.new [diag_size], dtype: dtype

    if main_diagonal
      0.upto(diag_size-1) do |i|
        diag[i] = self[i,i]
      end
    else
      row = 0
      (diag_size-1).downto(0) do |col|
        diag[row] = self[row,col]
        row += 1
      end
    end

    diag
  end

  #
  # call-seq:
  #     to_hash -> Hash
  #
  # Create a Ruby Hash from an NMatrix.
  #
  def to_hash
    if stype == :yale
      h = {}
      each_stored_with_indices do |val,i,j|
        next if val == 0 # Don't bother storing the diagonal zero values -- only non-zeros.
        if h.has_key?(i)
          h[i][j] = val
        else
          h[i] = {j => val}
        end
      end
      h
    else # dense and list should use a C internal function.
      # FIXME: Write a C internal to_h function.
      m = stype == :dense ? self.cast(:list, self.dtype) : self
      m.__list_to_hash__
    end
  end
  alias :to_h :to_hash


  def inspect #:nodoc:
    original_inspect = super()
    original_inspect = original_inspect[0...original_inspect.size-1]
    original_inspect + " " + inspect_helper.join(" ") + ">"
  end

  def __yale_ary__to_s(sym) #:nodoc:
    ary = self.send("__yale_#{sym.to_s}__".to_sym)

    '[' + ary.collect { |a| a ? a : 'nil'}.join(',') + ']'
  end


  # call-seq:
  #   integer_dtype?() -> Boolean
  #
  # Checks if dtype is an integer type
  #
  def integer_dtype?
    [:byte, :int8, :int16, :int32, :int64].include?(self.dtype)
  end

  # call-seq:
  #   float_dtype?() -> Boolean
  #
  # Checks if dtype is a floating point type
  #
  def float_dtype?
    [:float32, :float64].include?(dtype)
  end

  ##
  # call-seq:
  #   complex_dtype?() -> Boolean
  #
  # Checks if dtype is a complex type
  #
  def complex_dtype?
    [:complex64, :complex128].include?(self.dtype)
  end

  ##
  # call-seq:
  #
  # object_dtype?() -> Boolean
  #
  # Checks if dtype is a ruby object
  def object_dtype?
    dtype == :object
  end


  #
  # call-seq:
  #     to_f -> Float
  #
  # Converts an nmatrix with a single element (but any number of dimensions)
  #  to a float.
  #
  # Raises an IndexError if the matrix does not have just a single element.
  #
  def to_f
    raise IndexError, 'to_f only valid for matrices with a single element' unless shape.all? { |e| e == 1 }
    self[*Array.new(shape.size, 0)]
  end

  #
  # call-seq:
  #     to_flat_array -> Array
  #     to_flat_a -> Array
  #
  # Converts an NMatrix to a one-dimensional Ruby Array.
  #
  def to_flat_array
    ary = Array.new(self.size)
    self.each.with_index { |v,i| ary[i] = v }
    ary
  end
  alias :to_flat_a :to_flat_array

  #
  # call-seq:
  #     size -> Integer
  #
  # Returns the total size of the NMatrix based on its shape.
  #
  def size
    NMatrix.size(self.shape)
  end


  def to_s #:nodoc:
    self.to_flat_array.to_s
  end

  #
  # call-seq:
  #     nvector? -> true or false
  #
  # Shortcut function for determining whether the effective dimension is less than the dimension.
  # Useful when we take slices of n-dimensional matrices where n > 2.
  #
  def nvector?
    self.effective_dim < self.dim
  end

  #
  # call-seq:
  #     vector? -> true or false
  #
  # Shortcut function for determining whether the effective dimension is 1. See also #nvector?
  #
  def vector?
    self.effective_dim == 1
  end


  #
  # call-seq:
  #     to_a -> Array
  #
  # Converts an NMatrix to an array of arrays, or an NMatrix of effective dimension 1 to an array.
  #
  # Does not yet work for dimensions > 2
  def to_a(dimen=nil)
    if self.dim == 2

      return self.to_flat_a if self.shape[0] == 1

      ary = []
      begin
        self.each_row do |row|
          ary << row.to_flat_a
        end
      #rescue NotImplementedError # Oops. Try copying instead
      #  self.each_row(:copy) do |row|
      #    ary << row.to_a.flatten
      #  end
      end
      ary
    else
      to_a_rec(0)
    end
  end


  #
  # call-seq:
  #     rank(dimension, row_or_column_number) -> NMatrix
  #     rank(dimension, row_or_column_number, :reference) -> NMatrix reference slice
  #
  # Returns the rank (e.g., row, column, or layer) specified, using slicing by copy as default.
  #
  # See @row (dimension = 0), @column (dimension = 1)
  def rank(shape_idx, rank_idx, meth = :copy)

    if shape_idx > (self.dim-1)
      raise(RangeError, "#rank call was out of bounds")
    end

    params = Array.new(self.dim)
    params.each.with_index do |v,d|
      params[d] = d == shape_idx ? rank_idx : 0...self.shape[d]
    end

    meth == :reference ? self[*params] : self.slice(*params)
  end

  #
  # call-seq:
  #     column(column_number) -> NMatrix
  #     column(column_number, get_by) -> NMatrix
  #
  # Returns the column specified. Uses slicing by copy as default.
  #
  # * *Arguments* :
  #   - +column_number+ -> Integer.
  #   - +get_by+ -> Type of slicing to use, +:copy+ or +:reference+.
  # * *Returns* :
  #   - A NMatrix representing the requested column as a column vector.
  #
  # Examples:
  #
  #   m = NMatrix.new(2, [1, 4, 9, 14], :int32) # =>  1   4
  #                                                   9  14
  #
  #   m.column(1) # =>   4
  #                     14
  #
  def column(column_number, get_by = :copy)
    rank(1, column_number, get_by)
  end

  alias :col :column

  #
  # call-seq:
  #     row(row_number) -> NMatrix
  #     row(row_number, get_by) -> NMatrix
  #
  # * *Arguments* :
  #   - +row_number+ -> Integer.
  #   - +get_by+ -> Type of slicing to use, +:copy+ or +:reference+.
  # * *Returns* :
  #   - An NMatrix representing the requested row as a row vector.
  #
  def row(row_number, get_by = :copy)
    rank(0, row_number, get_by)
  end

  #
  # call-seq:
  #     last -> Element of self.dtype
  #
  # Returns the last element stored in an NMatrix
  #
  def last
    self[*Array.new(self.dim, -1)]
  end


  #
  # call-seq:
  #     reshape(new_shape) -> NMatrix
  #
  # Clone a matrix, changing the shape in the process. Note that this function does not do a resize; the product of
  # the new and old shapes' components must be equal.
  #
  # * *Arguments* :
  #   - +new_shape+ -> Array of positive Integers.
  # * *Returns* :
  #   - A copy with a different shape.
  #
  def reshape new_shape,*shapes
    if new_shape.is_a?Integer
      newer_shape =  [new_shape]+shapes
    else  # new_shape is an Array
      newer_shape = new_shape
    end
    t = reshape_clone_structure(newer_shape)
    left_params  = [:*]*newer_shape.size
    right_params = [:*]*self.shape.size
    t[*left_params] = self[*right_params]
    t
  end


  #
  # call-seq:
  #     reshape!(new_shape) -> NMatrix
  #     reshape! new_shape  -> NMatrix
  #
  # Reshapes the matrix (in-place) to the desired shape. Note that this function does not do a resize; the product of
  # the new and old shapes' components must be equal.
  #
  # * *Arguments* :
  #   - +new_shape+ -> Array of positive Integer.
  #
  def reshape! new_shape,*shapes
    if self.is_ref?
      raise(ArgumentError, "This operation cannot be performed on reference slices")
    else
      if new_shape.is_a?Integer
        shape =  [new_shape]+shapes
      else  # new_shape is an Array
        shape = new_shape
      end
      self.reshape_bang(shape)
    end
  end

  #
  # call-seq:
  #     transpose -> NMatrix
  #     transpose(permutation) -> NMatrix
  #
  # Clone a matrix, transposing it in the process. If the matrix is two-dimensional, the permutation is taken to be [1,0]
  # automatically (switch dimension 0 with dimension 1). If the matrix is n-dimensional, you must provide a permutation
  # of +0...n+.
  #
  # * *Arguments* :
  #   - +permutation+ -> Optional Array giving a permutation.
  # * *Returns* :
  #   - A copy of the matrix, but transposed.
  #
  def transpose(permute = nil)
    if permute.nil?
      if self.dim == 1
        return self.clone
      elsif self.dim == 2
        new_shape = [self.shape[1], self.shape[0]]
      else
        raise(ArgumentError, "need permutation array of size #{self.dim}")
      end
    elsif !permute.is_a?(Array) || permute.sort.uniq != (0...self.dim).to_a
      raise(ArgumentError, "invalid permutation array")
    else
      # Figure out the new shape based on the permutation given as an argument.
      new_shape = permute.map { |p| self.shape[p] }
    end

    if self.dim > 2 # FIXME: For dense, several of these are basically equivalent to reshape.

      # Make the new data structure.
      t = self.reshape_clone_structure(new_shape)

      self.each_stored_with_indices do |v,*indices|
        p_indices = permute.map { |p| indices[p] }
        t[*p_indices] = v
      end
      t
    elsif self.list? # TODO: Need a C list transposition algorithm.
      # Make the new data structure.
      t = self.reshape_clone_structure(new_shape)

      self.each_column.with_index do |col,j|
        t[j,:*] = col.to_flat_array
      end
      t
    else
      # Call C versions of Yale and List transpose, which do their own copies
      if jruby?
        nmatrix = NMatrix.new :copy
        nmatrix.shape = [@shape[1],@shape[0]]
        twoDMat = self.twoDMat.transpose
        nmatrix.s = ArrayRealVector.new(ArrayGenerator.getArrayDouble(twoDMat.getData(), shape[1],shape[0]))
        return nmatrix
      else
        self.clone_transpose
      end
    end
  end


  # call-seq:
  #     matrix1.concat(*m2) -> NMatrix
  #     matrix1.concat(*m2, rank) -> NMatrix
  #     matrix1.hconcat(*m2) -> NMatrix
  #     matrix1.vconcat(*m2) -> NMatrix
  #     matrix1.dconcat(*m3) -> NMatrix
  #
  # Joins two matrices together into a new larger matrix. Attempts to determine
  # which direction to concatenate on by looking for the first common element
  # of the matrix +shape+ in reverse. In other words, concatenating two columns
  # together without supplying +rank+ will glue them into an n x 2 matrix.
  #
  # You can also use hconcat, vconcat, and dconcat for the first three ranks.
  # concat performs an hconcat when no rank argument is provided.
  #
  # The two matrices must have the same +dim+.
  #
  # * *Arguments* :
  #   - +matrices+ -> one or more matrices
  #   - +rank+ -> Integer (for rank); alternatively, may use :row, :column, or
  #   :layer for 0, 1, 2, respectively
  def concat(*matrices)
    rank = nil
    rank = matrices.pop unless matrices.last.is_a?(NMatrix)

    # Find the first matching dimension and concatenate along that (unless rank is specified)
    if rank.nil?
      rank = self.dim-1
      self.shape.reverse_each.with_index do |s,i|
        matrices.each do |m|
          if m.shape[i] != s
            rank -= 1
            break
          end
        end
      end
    elsif rank.is_a?(Symbol) # Convert to numeric
      rank = {:row => 0, :column => 1, :col => 1, :lay => 2, :layer => 2}[rank]
    end

    # Need to figure out the new shape.
    new_shape = self.shape.dup
    new_shape[rank] = matrices.inject(self.shape[rank]) { |total,m| total + m.shape[rank] }

    # Now figure out the options for constructing the concatenated matrix.
    opts = {stype: self.stype, default: self.default_value, dtype: self.dtype}
    if self.yale?
      # We can generally predict the new capacity for Yale. Subtract out the number of rows
      # for each matrix being concatenated, and then add in the number of rows for the new
      # shape. That takes care of the diagonal. The rest of the capacity is represented by
      # the non-diagonal non-default values.
      new_cap = matrices.inject(self.capacity - self.shape[0]) do |total,m|
        total + m.capacity - m.shape[0]
      end - self.shape[0] + new_shape[0]
      opts = {capacity: new_cap}.merge(opts)
    end

    # Do the actual construction.
    n = NMatrix.new(new_shape, opts)

    # Figure out where to start concatenation. We don't know where it will end,
    # because each matrix may have own size along concat dimension.
    pos = Array.new(self.dim) { 0 }

    matrices.unshift(self)
    matrices.each do |m|
      # Figure out where to start and stop the concatenation. We'll use
      # NMatrices instead of Arrays because then we can do elementwise addition.
      ranges = m.shape.map.with_index { |s,i| pos[i]...(pos[i] + s) }

      n[*ranges] = m

      # Move over by the requisite amount
      pos[rank] = pos[rank] + m.shape[rank]
    end

    n
  end

  # Horizontal concatenation with +matrices+.
  def hconcat(*matrices)
    concat(*matrices, :column)
  end

  # Vertical concatenation with +matrices+.
  def vconcat(*matrices)
    concat(*matrices, :row)
  end

  # Depth concatenation with +matrices+.
  def dconcat(*matrices)
    concat(*matrices, :layer)
  end


  #
  # call-seq:
  #     upper_triangle -> NMatrix
  #     upper_triangle(k) -> NMatrix
  #     triu -> NMatrix
  #     triu(k) -> NMatrix
  #
  # Returns the upper triangular portion of a matrix. This is analogous to the +triu+ method
  # in MATLAB.
  #
  # * *Arguments* :
  #   - +k+ -> Positive integer. How many extra diagonals to include in the upper triangular portion.
  #
  def upper_triangle(k = 0)
    raise(NotImplementedError, "only implemented for 2D matrices") if self.shape.size > 2

    t = self.clone_structure
    (0...self.shape[0]).each do |i|
      if i - k < 0
        t[i, :*] = self[i, :*]
      else
        t[i, 0...(i-k)]             = 0
        t[i, (i-k)...self.shape[1]] = self[i, (i-k)...self.shape[1]]
      end
    end
    t
  end
  alias :triu :upper_triangle


  #
  # call-seq:
  #     upper_triangle! -> NMatrix
  #     upper_triangle!(k) -> NMatrix
  #     triu! -> NMatrix
  #     triu!(k) -> NMatrix
  #
  # Deletes the lower triangular portion of the matrix (in-place) so only the upper portion remains.
  #
  # * *Arguments* :
  #   - +k+ -> Integer. How many extra diagonals to include in the deletion.
  #
  def upper_triangle!(k = 0)
    raise(NotImplementedError, "only implemented for 2D matrices") if self.shape.size > 2

    (0...self.shape[0]).each do |i|
      if i - k >= 0
        self[i, 0...(i-k)] = 0
      end
    end
    self
  end
  alias :triu! :upper_triangle!


  #
  # call-seq:
  #     lower_triangle -> NMatrix
  #     lower_triangle(k) -> NMatrix
  #     tril -> NMatrix
  #     tril(k) -> NMatrix
  #
  # Returns the lower triangular portion of a matrix. This is analogous to the +tril+ method
  # in MATLAB.
  #
  # * *Arguments* :
  #   - +k+ -> Integer. How many extra diagonals to include in the lower triangular portion.
  #
  def lower_triangle(k = 0)
    raise(NotImplementedError, "only implemented for 2D matrices") if self.shape.size > 2

    t = self.clone_structure
    (0...self.shape[0]).each do |i|
      if i + k >= shape[0]
        t[i, :*] = self[i, :*]
      else
        t[i, (i+k+1)...self.shape[1]] = 0
        t[i, 0..(i+k)] = self[i, 0..(i+k)]
      end
    end
    t
  end
  alias :tril :lower_triangle


  #
  # call-seq:
  #     lower_triangle! -> NMatrix
  #     lower_triangle!(k) -> NMatrix
  #     tril! -> NMatrix
  #     tril!(k) -> NMatrix
  #
  # Deletes the upper triangular portion of the matrix (in-place) so only the lower portion remains.
  #
  # * *Arguments* :
  #   - +k+ -> Integer. How many extra diagonals to include in the deletion.
  #
  def lower_triangle!(k = 0)
    raise(NotImplementedError, "only implemented for 2D matrices") if self.shape.size > 2

    (0...self.shape[0]).each do |i|
      if i + k < shape[0]
        self[i, (i+k+1)...self.shape[1]] = 0
      end
    end
    self
  end
  alias :tril! :lower_triangle!


  #
  # call-seq:
  #     layer(layer_number) -> NMatrix
  #     row(layer_number, get_by) -> NMatrix
  #
  # * *Arguments* :
  #   - +layer_number+ -> Integer.
  #   - +get_by+ -> Type of slicing to use, +:copy+ or +:reference+.
  # * *Returns* :
  #   - A NMatrix representing the requested layer as a layer vector.
  #
  def layer(layer_number, get_by = :copy)
    layer = rank(2, layer_number, get_by)

    if jruby?
      nmatrix = NMatrix.new :copy
      nmatrix.shape = layer.shape
      nmatrix.s = layer.s
      return nmatrix
    else
      layer
    end

  end


  #
  # call-seq:
  #     shuffle! -> ...
  #     shuffle!(random: rng) -> ...
  #
  # Re-arranges the contents of an NVector.
  #
  # TODO: Write more efficient version for Yale, list.
  # TODO: Generalize for more dimensions.
  def shuffle!(*args)
    method_missing(:shuffle!, *args) if self.effective_dim > 1
    ary = self.to_flat_a
    ary.shuffle!(*args)
    ary.each.with_index { |v,idx| self[idx] = v }
    self
  end


  #
  # call-seq:
  #     shuffle -> ...
  #     shuffle(rng) -> ...
  #
  # Re-arranges the contents of an NVector.
  #
  # TODO: Write more efficient version for Yale, list.
  # TODO: Generalize for more dimensions.
  def shuffle(*args)
    method_missing(:shuffle!, *args) if self.effective_dim > 1
    t = self.clone
    t.shuffle!(*args)
  end


  #
  # call-seq:
  #     sorted_indices -> Array
  #
  # Returns an array of the indices ordered by value sorted.
  #
  def sorted_indices
    return method_missing(:sorted_indices) unless vector?
    ary = self.to_flat_array
    ary.each_index.sort_by { |i| ary[i] }  # from: http://stackoverflow.com/a/17841159/170300
  end


  #
  # call-seq:
  #     binned_sorted_indices -> Array
  #
  # Returns an array of arrays of indices ordered by value sorted. Functions basically like +sorted_indices+, but
  # groups indices together for those values that are the same.
  #
  def binned_sorted_indices
    return method_missing(:sorted_indices) unless vector?
    ary = self.to_flat_array
    ary2 = []
    last_bin = ary.each_index.sort_by { |i| [ary[i]] }.inject([]) do |result, element|
      if result.empty? || ary[result[-1]] == ary[element]
        result << element
      else
        ary2 << result
        [element]
      end
    end
    ary2 << last_bin unless last_bin.empty?
    ary2
  end


  def method_missing name, *args, &block #:nodoc:
    if name.to_s =~ /^__list_elementwise_.*__$/
      raise NotImplementedError, "requested undefined list matrix element-wise operation"
    elsif name.to_s =~ /^__yale_scalar_.*__$/
      raise NotImplementedError, "requested undefined yale scalar element-wise operation"
    else
      super(name, *args, &block)
    end
  end


  def respond_to?(method, include_all = false) #:nodoc:
    if [:shuffle, :shuffle!, :each_with_index, :sorted_indices, :binned_sorted_indices, :nrm2, :asum].include?(method.intern) # vector-only methods
      return vector?
    elsif [:each_layer, :layer].include?(method.intern) # 3-or-more dimensions only
      return dim > 2
    else
      super
    end
  end


  #
  # call-seq:
  #     inject -> symbol
  #
  # This overrides the inject function to use map_stored for yale matrices
  #
  def inject(sym)
    return super(sym) unless self.yale?
    return self.map_stored.inject(sym)
  end

  # Returns the index of the first occurence of the specified value. Returns
  # an array containing the position of the value, nil in case the value is not found.
  #
  def index(value)
    index = nil

    self.each_with_indices do |yields|
      if yields.first == value
        yields.shift
        index = yields
        break
      end
    end

    index
  end

  #
  # call-seq:
  #     clone_structure -> NMatrix
  #
  # This function is like clone, but it only copies the structure and the default value.
  # None of the other values are copied. It takes an optional capacity argument. This is
  # mostly only useful for dense, where you may not want to initialize; for other types,
  # you should probably use +zeros_like+.
  #
  def clone_structure(capacity = nil)
    opts = {stype: self.stype, default: self.default_value, dtype: self.dtype}
    opts = {capacity: capacity}.merge(opts) if self.yale?
    NMatrix.new(self.shape, opts)
  end

  #
  # call-seq:
  #     repeat(count, axis) -> NMatrix
  #
  # * *Arguments* :
  #   - +count+ -> how many times NMatrix should be repeated
  #   - +axis+ -> index of axis along which NMatrix should be repeated
  # * *Returns* :
  #   - NMatrix created by repeating the existing one along an axis
  # * *Examples* :
  #     m = NMatrix.new([2, 2], [1, 2, 3, 4])
  #     m.repeat(2, 0).to_a #<= [[1, 2], [3, 4], [1, 2], [3, 4]]
  #     m.repeat(2, 1).to_a #<= [[1, 2, 1, 2], [3, 4, 3, 4]]
  def repeat(count, axis)
    raise(ArgumentError, 'Matrix should be repeated at least 2 times.') if count < 2
    new_shape = shape
    new_shape[axis] *= count
    new_matrix = NMatrix.new(new_shape, dtype: dtype)
    slice = new_shape.map { |axis_size| 0...axis_size }
    start = 0
    count.times do
      slice[axis] = start...(start += shape[axis])
      new_matrix[*slice] = self
    end
    new_matrix
  end

  # This is how you write an individual element-wise operation function:
  #def __list_elementwise_add__ rhs
  #  self.__list_map_merged_stored__(rhs){ |l,r| l+r }.cast(self.stype, NMatrix.upcast(self.dtype, rhs.dtype))
  #end
protected

  def inspect_helper #:nodoc:
    ary = []
    ary << "shape:[#{shape.join(',')}]" << "dtype:#{dtype}" << "stype:#{stype}"

    if stype == :yale
      ary << "capacity:#{capacity}"

      # These are enabled by the DEBUG_YALE compiler flag in extconf.rb.
      if respond_to?(:__yale_a__)
        ary << "ija:#{__yale_ary__to_s(:ija)}" << "ia:#{__yale_ary__to_s(:ia)}" <<
          "ja:#{__yale_ary__to_s(:ja)}" << "a:#{__yale_ary__to_s(:a)}" << "d:#{__yale_ary__to_s(:d)}" <<
          "lu:#{__yale_ary__to_s(:lu)}" << "yale_size:#{__yale_size__}"
      end

    end

    ary
  end


  # Clone the structure as needed for a reshape
  def reshape_clone_structure(new_shape) #:nodoc:
    raise(ArgumentError, "reshape cannot resize; size of new and old matrices must match") unless self.size == new_shape.inject(1) { |p,i| p *= i }

    opts = {stype: self.stype, default: self.default_value, dtype: self.dtype}
    if self.yale?
      # We can generally predict the change in capacity for Yale.
      opts = {capacity: self.capacity - self.shape[0] + new_shape[0]}.merge(opts)
    end
    NMatrix.new(new_shape, opts)
  end


  # Helper for converting a matrix into an array of arrays recursively
  def to_a_rec(dimen = 0) #:nodoc:
    return self.flat_map { |v| v } if dimen == self.dim-1

    ary = []
    self.each_rank(dimen) do |sect|
      ary << sect.to_a_rec(dimen+1)
    end
    ary
  end


  # NMatrix constructor helper for sparse matrices. Uses multi-slice-setting to initialize a matrix
  # with a given array of initial values.
  def __sparse_initial_set__(ary) #:nodoc:
    self[0...self.shape[0],0...self.shape[1]] = ary
  end


  # This function assumes that the shapes of the two matrices have already
  # been tested and are the same.
  #
  # Called from inside NMatrix: nm_eqeq
  #
  # There are probably more efficient ways to do this, but currently it's unclear how.
  # We could use +each_row+, but for list matrices, it's still going to need to make a
  # reference to each of those rows, and that is going to require a seek.
  #
  # It might be more efficient to convert one sparse matrix type to the other with a
  # cast and then run the comparison. For now, let's assume that people aren't going
  # to be doing this very often, and we can optimize as needed.
  def dense_eql_sparse? m #:nodoc:
    m.each_with_indices do |v,*indices|
      return false if self[*indices] != v
    end

    return true
  end
  alias :sparse_eql_sparse? :dense_eql_sparse?


  #
  # See the note in #cast about why this is necessary.
  # If this is a non-dense matrix with a complex dtype and to_dtype is
  # non-complex, then this will convert the default value to noncomplex.
  # Returns 0 if dense.  Returns existing default_value if there isn't a
  # mismatch.
  #
  def maybe_get_noncomplex_default_value(to_dtype) #:nodoc:
    default_value = 0
    unless self.stype == :dense then
      if self.dtype.to_s.start_with?('complex') and not to_dtype.to_s.start_with?('complex') then
        default_value = self.default_value.real
      else
        default_value = self.default_value
      end
    end
    default_value
  end

end

require_relative './shortcuts.rb'
require_relative './enumerate.rb'

require_relative './version.rb'
require_relative './blas.rb'


================================================
FILE: lib/nmatrix/rspec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == rspec.rb
#
# Monkey patches for RSpec improving its ability to work well with
# NMatrix (particularly #be_within).
#

require 'rspec'

# Amend RSpec to allow #be_within for matrices.
module RSpec::Matchers::BuiltIn
  class BeWithin

    def of(expected)
      @expected = expected
      @unit     = ''
      if expected.is_a?(NMatrix)
        @tolerance = if @delta.is_a?(NMatrix)
                       @delta.abs
                     elsif @delta.is_a?(Array)
                       NMatrix.new(:dense, expected.shape, @delta, :object).abs.cast(:dtype => expected.abs_dtype)
                     else
                       (NMatrix.ones_like(expected) * @delta).abs
                     end
      else
        @tolerance = @delta
      end

      self
    end

    def percent_of(expected)
      @expected  = expected
      @unit      = '%'
      @tolerance = @expected.abs * @delta / 100.0 # <- only change is to reverse abs and @delta
      self
    end

    def matches?(actual)
      @actual = actual
      raise needs_expected     unless defined? @expected
      raise needs_subtractable unless @actual.respond_to? :-
      res = (@actual - @expected).abs <= @tolerance

      #if res.is_a?(NMatrix)
      #  require 'pry'
      #  binding.pry
      #end

      res.is_a?(NMatrix) ? !res.any? { |x| !x } : res
    end

  end
end


================================================
FILE: lib/nmatrix/shortcuts.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == shortcuts.rb
#
# These are shortcuts for NMatrix and NVector creation, contributed by Daniel
# Carrera (dcarrera@hush.com) and Carlos Agarie (carlos.agarie@gmail.com).
#
# TODO Make all the shortcuts available through modules, allowing someone
# to include them to make "MATLAB-like" scripts.
#
# There are some questions to be answered before this can be done, tho.
#++

class NMatrix

  # Methods for generating magic matrix.
  module MagicHelpers
    class << self
      def odd_magic(nm, shape)
        row = shape - 1
        col = shape / 2       
        nm[row,col] = 1
        (2..shape * shape).each do |index|
          if nm[(row + 1) % shape,(col + 1) % shape] == 0
            row = (row + 1) % shape
            col = (col + 1) % shape
          else
            row = (row - 1 + shape) % shape
          end
            nm[row,col] = index
        end
      end
    
      def doubly_even_magic(nm, shape)
        mini_square_num = shape / 4
        count = 1     
        inv_count = shape * shape
        shape.times do |row|
          shape.times do |col|
            if col >= mini_square_num and col < shape - mini_square_num
              if row >= mini_square_num and row < shape - mini_square_num
      	        nm[row,col] = count
              else 
                nm[row,col] = inv_count
              end
            elsif row < mini_square_num or row >= shape - mini_square_num
              nm[row,col] = count
            else
              nm[row,col] = inv_count
            end
            count += 1
            inv_count -= 1  
          end
        end
      end
    
      def singly_even_magic(nm, shape)
        half_shape = shape / 2
        complementary_pair = (shape - 2) / 4
        swap_col = NMatrix.new([shape])
        index = 0 
        mini_magic = NMatrix.new([half_shape,half_shape], 0, dtype: nm.dtype)
        odd_magic mini_magic, half_shape
        half_shape.times do |row|
          half_shape.times do |col|
            nm[row,col] = mini_magic[row,col]  	
            nm[row + half_shape,col + half_shape] = mini_magic[row,col] + half_shape * half_shape  
            nm[row,col + half_shape] = mini_magic[row,col] + 2 * half_shape * half_shape      
            nm[row + half_shape,col] = mini_magic[row,col] + 3 * half_shape * half_shape       
          end  
        end
  
        (1..complementary_pair).each do |complementary_entry|
          swap_col[index] = complementary_entry
          index += 1
        end
      
        (shape - complementary_pair + 2..shape).each do |center|
          swap_col[index] = center
          index += 1
        end 
      
        (1..half_shape).each do |row|
          (1..index).each do |col|
            temp = nm[row - 1,swap_col[col - 1] - 1]
            nm[row - 1,swap_col[col - 1] - 1] = nm[row + half_shape - 1,swap_col[col - 1] - 1]
            nm[row + half_shape - 1,swap_col[col - 1] - 1] = temp
          end
        end

        temp = nm[complementary_pair,0] 
        nm[complementary_pair,0] = nm[complementary_pair + half_shape,0] 
        nm[complementary_pair + half_shape,0] = temp

        temp = nm[complementary_pair + half_shape,complementary_pair]
        nm[complementary_pair + half_shape,complementary_pair] = nm[complementary_pair,complementary_pair] 
        nm[complementary_pair,complementary_pair] = temp
      end  
    end 
  end 
    
  # call-seq:
  #     m.dense? -> true or false
  #
  # Determine if +m+ is a dense matrix.
  def dense?; return stype == :dense; end

  # call-seq:
  #     m.yale? -> true or false
  #
  # Determine if +m+ is a Yale matrix.
  def yale?;  return stype == :yale; end

  # call-seq:
  #     m.list? -> true or false
  #
  # Determine if +m+ is a list-of-lists matrix.
  def list?;  return stype == :list; end

  class << self
    # call-seq:
    #     NMatrix[Numeric, ..., Numeric, dtype: Symbol] -> NMatrix
    #     NMatrix[Array, dtype: Symbol] -> NMatrix
    #
    # The default value for +dtype+ is guessed from the first parameter. For example:
    #   NMatrix[1.0, 2.0].dtype # => :float64
    #
    # But this is just a *guess*. If the other values can't be converted to
    # this dtype, a +TypeError+ will be raised.
    #
    # You can use the +N+ constant in this way:
    #   N = NMatrix
    #   N[1, 2, 3]
    #
    # NMatrix needs to have a succinct way to create a matrix by specifying the
    # components directly. This is very useful for using it as an advanced
    # calculator, it is useful for learning how to use, for testing language
    # features and for developing algorithms.
    #
    # The NMatrix::[] method provides a way to create a matrix in a way that is compact and
    # natural. The components are specified using Ruby array syntax. Optionally,
    # one can specify a dtype as the last parameter (default is :float64).
    #
    # Examples:
    #
    #   a = N[ 1,2,3,4 ]          =>  1  2  3  4
    #
    #   a = N[ 1,2,3,4, :int32 ]  =>  1  2  3  4
    #
    #   a = N[ [1,2,3], [3,4,5] ] =>  1.0  2.0  3.0
    #                                 3.0  4.0  5.0
    #
    #   a = N[ 3,6,9 ].transpose => 3
    #                               6
    #                               9
    #
    # SYNTAX COMPARISON:
    #
    #   MATLAB:  a = [ [1 2 3] ; [4 5 6] ]   or  [ 1 2 3 ; 4 5 6 ]
    #   IDL:   a = [ [1,2,3] , [4,5,6] ]
    #   NumPy:  a = array( [1,2,3], [4,5,6] )
    #
    #   SciRuby:      a = NMatrix[ [1,2,3], [4,5,6] ]
    #   Ruby array:   a =  [ [1,2,3], [4,5,6] ]
    def [](*params)
      options = params.last.is_a?(Hash) ? params.pop : {}

      # First find the dimensions of the array.
      i = 0
      shape = []
      row = params
      while row.is_a?(Array)
        shape[i] = row.length
        row = row[0]
        i += 1
      end

      # A row vector should be stored as 1xN, not N
      #shape.unshift(1) if shape.size == 1

      # Then flatten the array.
      NMatrix.new(shape, params.flatten, options)
    end

    #
    # call-seq:
    #    zeros(shape) -> NMatrix
    #    zeros(shape, dtype: dtype) -> NMatrix
    #    zeros(shape, dtype: dtype, stype: stype) -> NMatrix
    #
    # Creates a new matrix of zeros with the dimensions supplied as
    # parameters.
    #
    # * *Arguments* :
    #   - +shape+ -> Array (or integer for square matrix) specifying the dimensions.
    #   - +dtype+ -> (optional) Default is +:float64+
    #   - +stype+ -> (optional) Default is +:dense+.
    # * *Returns* :
    #   - NMatrix filled with zeros.
    #
    # Examples:
    #
    #   NMatrix.zeros(2) # =>  0.0   0.0
    #                          0.0   0.0
    #
    #   NMatrix.zeros([2, 3], dtype: :int32) # =>  0  0  0
    #                                              0  0  0
    #
    #   NMatrix.zeros([1, 5], dtype: :int32) # =>  0  0  0  0  0
    #
    def zeros(shape, opts = {})
      NMatrix.new(shape, 0, {:dtype => :float64}.merge(opts))
    end
    alias :zeroes :zeros

    #
    # call-seq:
    #     ones(shape) -> NMatrix
    #     ones(shape, dtype: dtype, stype: stype) -> NMatrix
    #
    # Creates a matrix filled with ones.
    #
    # * *Arguments* :
    #   - +shape+ -> Array (or integer for square matrix) specifying the shape.
    #   - +opts+ -> (optional) Hash of options from NMatrix#initialize
    # * *Returns* :
    #   - NMatrix filled with ones.
    #
    # Examples:
    #
    #   NMatrix.ones([1, 3]) # =>  1.0   1.0   1.0
    #
    #   NMatrix.ones([2, 3], dtype: :int32) # =>  1  1  1
    #                                             1  1  1
    #
    def ones(shape, opts={})
      NMatrix.new(shape, 1, {:dtype => :float64, :default => 1}.merge(opts))
    end

    # call-seq:
    #   ones_like(nm) -> NMatrix
    #
    # Creates a new matrix of ones with the same dtype and shape as the
    # provided matrix.
    #
    # @param [NMatrix] nm the nmatrix whose dtype and shape will be used
    # @return [NMatrix] a new nmatrix filled with ones.
    #
    def ones_like(nm)
      NMatrix.ones(nm.shape, dtype: nm.dtype, stype: nm.stype, capacity: nm.capacity, default: 1)
    end

    # call-seq:
    #   zeros_like(nm) -> NMatrix
    #
    # Creates a new matrix of zeros with the same stype, dtype, and shape
    # as the provided matrix.
    #
    # @param [NMatrix] nm the nmatrix whose stype, dtype, and shape will be used
    # @return [NMatrix] a new nmatrix filled with zeros.
    #
    def zeros_like(nm)
      NMatrix.zeros(nm.shape, dtype: nm.dtype, stype: nm.stype, capacity: nm.capacity, default: 0)
    end

    #
    # call-seq:
    #     eye(shape) -> NMatrix
    #     eye(shape, dtype: dtype) -> NMatrix
    #     eye(shape, stype: stype, dtype: dtype) -> NMatrix
    #
    # Creates an identity matrix (square matrix rank 2).
    #
    # * *Arguments* :
    #   - +size+ -> Array (or integer for square matrix) specifying the dimensions.
    #   - +dtype+ -> (optional) Default is +:float64+
    #   - +stype+ -> (optional) Default is +:dense+.
    # * *Returns* :
    #   - An identity matrix.
    #
    # Examples:
    #
    #    NMatrix.eye(3) # =>   1.0   0.0   0.0
    #                          0.0   1.0   0.0
    #                          0.0   0.0   1.0
    #
    #    NMatrix.eye(3, dtype: :int32) # =>   1   0   0
    #                                         0   1   0
    #                                         0   0   1
    #
    #    NMatrix.eye(2, dtype: :int32, stype: :yale) # =>   1   0
    #                                                       0   1
    #
    def eye(shape, opts={})
      # Fill the diagonal with 1's.
      m = NMatrix.zeros(shape, {:dtype => :float64}.merge(opts))
      (0...m.shape[0]).each do |i|
        m[i, i] = 1
      end

      m
    end
    alias :identity :eye

    #
    # call-seq:
    #     hilbert(shape) -> NMatrix
    #     hilbert(shape, dtype: dtype) -> NMatrix
    #     hilbert(shape, stype: stype, dtype: dtype) -> NMatrix
    #
    # Creates an hilbert matrix (square matrix).
    #
    # * *Arguments* :
    #   - +size+ -> integer ( for square matrix) specifying the dimensions.
    #   - +dtype+ -> (optional) Default is +:float64+
    #   - +stype+ -> (optional) Default is +:dense+.
    # * *Returns* :
    #   - A hilbert matrix.
    #
    # Examples:
    #
    #    NMatrix.hilbert(3) # =>  1.0     0.5      0.3333333333333333
    #            0.5                         0.3333333333333333    0.25
    #            0.3333333333333333          0.25                  0.2
    #
    def hilbert(shape, opts={})
      m = NMatrix.new([shape,shape], {:dtype => :float64}.merge(opts))
      0.upto(shape - 1) do |i|
        0.upto(i) do |j|
          m[i,j] = 1.0 / (j + i + 1)
          m[j,i] = m[i,j] if i != j
        end
      end
      m
    end

    #
    # call-seq:
    #     inv_hilbert(shape) -> NMatrix
    #     inv_hilbert(shape, dtype: dtype) -> NMatrix
    #     inv_hilbert(shape, stype: stype, dtype: dtype) -> NMatrix
    #
    # Creates an inverse hilbert matrix (square matrix rank 2).
    #
    # * *Arguments* :
    #   - +size+ -> Array (or integer for square matrix) specifying the dimensions.
    #   - +dtype+ -> (optional) Default is +:float64+
    #   - +stype+ -> (optional) Default is +:dense+.
    # * *Returns* :
    #   - A hilbert matrix.
    #
    # Examples:
    #    NMatrix.inv_hilbert(3) # =>   9.0,  -36.0,   30.0
    #                          -36.0,  192.0, -180.0
    #                          30.0, -180.0,  180.0
    #
    #
    def inv_hilbert(shape, opts={})
      opts = {:dtype => :float64}.merge(opts)
      m = NMatrix.new([shape,shape],opts)
      combination = NMatrix.new([2*shape,2*shape],opts)
      #combinations refers to the combination of n things taken k at a time
      0.upto(2*shape-1) do |i|
        0.upto(i) do |j|
          if j != 0 and j != i
            combination[i,j] = combination[i-1,j] + combination[i-1,j-1]
          else
            combination[i,j] = 1
          end
        end
      end

      0.upto(shape-1) do |i|
        0.upto(i) do |j|
          m[i,j] = combination[shape + j,shape - i - 1] * ((i+j)+1) * \
          combination[shape + i,shape - j - 1] * (-1) ** ((i+j)) * \
          combination[(i+j),i] * combination[(i+j),i]
          m[j,i] = m[i,j] if i != j
        end
      end
      m
    end

    #
    # call-seq:
    #     diagonals(array) -> NMatrix
    #     diagonals(array, dtype: dtype, stype: stype) -> NMatrix
    #
    # Creates a matrix filled with specified diagonals.
    #
    # * *Arguments* :
    #   - +entries+ -> Array containing input values for diagonal matrix
    #   - +options+ -> (optional) Hash with options for NMatrix#initialize
    # * *Returns* :
    #   - NMatrix filled with specified diagonal values.
    #
    # Examples:
    #
    #   NMatrix.diagonal([1.0,2,3,4]) # => 1.0 0.0 0.0 0.0
    #                                      0.0 2.0 0.0 0.0
    #                                      0.0 0.0 3.0 0.0
    #                                      0.0 0.0 0.0 4.0
    #
    #   NMatrix.diagonal([1,2,3,4], dtype: :int32) # => 1 0 0 0
    #                                                   0 2 0 0
    #                                                   0 0 3 0
    #                                                   0 0 0 4
    #
    #
    def diagonal(entries, opts={})
      m = NMatrix.zeros(entries.size,
                        {:dtype => guess_dtype(entries[0]), :capacity => entries.size + 1}.merge(opts)
                       )
      entries.each_with_index do |n, i|
        m[i,i] = n
      end
      m
    end
    alias :diag :diagonal
    alias :diagonals :diagonal

    # Generate a block-diagonal NMatrix from the supplied 2D square matrices.
    #
    # * *Arguments*
    #   - +*params+ -> An array that collects all arguments passed to the method. The method
    #                  can receive any number of arguments. Optionally, the last entry of +params+ is 
    #                  a hash of options from NMatrix#initialize. All other entries of +params+ are 
    #                  the blocks of the desired block-diagonal matrix. Each such matrix block can be 
    #                  supplied as a square 2D NMatrix object, or alternatively as an array of arrays 
    #                  (with dimensions corresponding to a square matrix), or alternatively as a number.
    # * *Returns*
    #   - NMatrix of block-diagonal form filled with specified matrices
    #     as the blocks along the diagonal.
    #
    # * *Example* 
    #
    #  a = NMatrix.new([2,2], [1,2,3,4])
    #  b = NMatrix.new([1,1], [123], dtype: :float64)
    #  c = Array.new(2) { [[10,10], [10,10]] }
    #  d = Array[[1,2,3], [4,5,6], [7,8,9]]
    #  m = NMatrix.block_diagonal(a, b, *c, d, 10.0, 11, dtype: :int64, stype: :yale)
    #        => 
    #        [
    #          [1, 2,   0,  0,  0,  0,  0, 0, 0, 0,  0,  0]
    #          [3, 4,   0,  0,  0,  0,  0, 0, 0, 0,  0,  0]
    #          [0, 0, 123,  0,  0,  0,  0, 0, 0, 0,  0,  0]
    #          [0, 0,   0, 10, 10,  0,  0, 0, 0, 0,  0,  0]
    #          [0, 0,   0, 10, 10,  0,  0, 0, 0, 0,  0,  0]
    #          [0, 0,   0,  0,  0, 10, 10, 0, 0, 0,  0,  0]
    #          [0, 0,   0,  0,  0, 10, 10, 0, 0, 0,  0,  0]
    #          [0, 0,   0,  0,  0,  0,  0, 1, 2, 3,  0,  0]
    #          [0, 0,   0,  0,  0,  0,  0, 4, 5, 6,  0,  0]
    #          [0, 0,   0,  0,  0,  0,  0, 7, 8, 9,  0,  0]
    #          [0, 0,   0,  0,  0,  0,  0, 0, 0, 0, 10,  0]
    #          [0, 0,   0,  0,  0,  0,  0, 0, 0, 0,  0, 11]
    #        ]
    #
    def block_diagonal(*params)
      options = params.last.is_a?(Hash) ? params.pop : {}

      params.each_index do |i|
        params[i] = params[i].to_nm if params[i].is_a?(Array) # Convert Array to NMatrix
        params[i] = NMatrix.new([1,1], [params[i]]) if params[i].is_a?(Numeric) # Convert number to NMatrix
      end

      block_sizes = [] #holds the size of each matrix block
      params.each do |b|
        unless b.is_a?(NMatrix)
          raise(ArgumentError, "Only NMatrix or appropriate Array objects or single numbers allowed")
        end
        raise(ArgumentError, "Only 2D matrices or 2D arrays allowed") unless b.shape.size == 2
        raise(ArgumentError, "Only square-shaped blocks allowed") unless b.shape[0] == b.shape[1]
        block_sizes << b.shape[0]
      end

      block_diag_mat = NMatrix.zeros(block_sizes.inject(0,:+), options)
      (0...params.length).each do |n|
        # First determine the size and position of the n'th block in the block-diagonal matrix
        block_size = block_sizes[n]
        block_pos = block_sizes[0...n].inject(0,:+)
        # populate the n'th block in the block-diagonal matrix
        (0...block_size).each do |i|
          (0...block_size).each do |j|
            block_diag_mat[block_pos+i,block_pos+j] = params[n][i,j]
          end
        end
      end

      return block_diag_mat
    end
    alias :block_diag :block_diagonal

    #
    # call-seq:
    #     random(shape) -> NMatrix
    #
    # Creates a +:dense+ NMatrix with random numbers between 0 and 1 generated
    # by +Random::rand+. The parameter is the dimension of the matrix.
    #
    # If you use an integer dtype, make sure to specify :scale as a parameter, or you'll
    # only get a matrix of 0s.
    #
    # * *Arguments* :
    #   - +shape+ -> Array (or integer for square matrix) specifying the dimensions.
    # * *Returns* :
    #   - NMatrix filled with random values.
    #
    # Examples:
    #
    #   NMatrix.random([2, 2]) # => 0.4859439730644226   0.1783195585012436
    #                               0.23193766176700592  0.4503345191478729
    #
    #   NMatrix.random([2, 2], :dtype => :byte, :scale => 255) # => [ [252, 108] [44, 12] ]
    #
    def random(shape, opts={})
      scale = opts.delete(:scale) || 1.0

      if opts[:seed].nil?
        rng = Random.new
      else
        rng = Random.new(opts[:seed])
      end
      

      random_values = []


      # Construct the values of the final matrix based on the dimension.
      if opts[:dtype] == :complex64 || opts[:dtype] == :complex128
        NMatrix.size(shape).times { |i| random_values << Complex(rng.rand(scale), rng.rand(scale)) }
      else
        NMatrix.size(shape).times { |i| random_values << rng.rand(scale) }
      end

      NMatrix.new(shape, random_values, {:dtype => :float64, :stype => :dense}.merge(opts))
    end
    alias :rand :random
    
    # 
    #  call-seq:
    #    magic(shape) -> NMatrix
    #    magic(shape, dtype: dtype) -> NMatrix
    #
    #  The parameter is the dimension of the matrix.
    #
    #  Creates a +:dense+ NMatrix with the following properties:
    #    - An arrangement of the numbers from 1 to n^2 (n-squared) in the matrix, with each number occurring exactly once.
    #    - The sum of the entries of any row, any column, or any main diagonal is the same.
    #    - This sum must be n(n^2+1)/2.
    #   
    #  See: http://www.mathworks.com/help/matlab/ref/magic.html
    # 
    #  * *Arguments* :
    #   - +shape+ -> Array (or integer for square matrix) specifying the dimensions.
    #   - +dtype+ -> (optional) Default is +:float64+
    #  * *Returns* :
    #   - NMatrix with the above given properties.
    #
    #  Examples:
    #       
    #    NMatrix.magic(3) # => [  [4.0, 9.0, 2.0]   [3.0, 5.0, 7.0]   [8.0, 1.0, 6.0] ]
    #    
    #    NMatrix.magic(4, dtype :int32) # => [  [ 1, 15, 14,  4]
    #                                           [12,  6,  7,  9]
    #                                           [ 8, 10, 11,  5]
    #                                           [13,  3,  2, 16] ]
    #                             
    #    NMatrix.magic(6,dtype: :int64) # => [  [31,  9,  2, 22, 27, 20]
    #                                           [ 3, 32,  7, 21, 23, 25]
    #                                           [35,  1,  6, 26, 19, 24]
    #                                           [ 4, 36, 29, 13, 18, 11]
    #                                           [30,  5, 34, 12, 14, 16]
    #                                           [ 8, 28, 33, 17, 10, 15] ]
    #
    def magic(shape, opts={})
      raise(ArgumentError, "shape of two is not allowed") if shape == 2
      nm = NMatrix.new([shape,shape], 0, {:dtype => :float64}.merge(opts))
      if shape % 2 != 0
        MagicHelpers.odd_magic nm, shape
      elsif shape % 4 == 0
        MagicHelpers.doubly_even_magic nm, shape
      else   
        MagicHelpers.singly_even_magic nm, shape
      end
      nm
    end
    
    #
    # call-seq:
    #     linspace(base, limit) -> 1x100 NMatrix
    #     linspace(base, limit, *shape) -> NMatrix
    #
    # Returns an NMatrix with +[shape[0] x shape[1] x .. x shape[dim-1]]+ values of dtype +:float64+ equally spaced from
    # +base+ to +limit+, inclusive.
    #
    # See: http://www.mathworks.com/help/matlab/ref/linspace.html
    #
    # * *Arguments* :
    #   - +base+ -> The first value in the sequence.
    #   - +limit+ -> The last value in the sequence.
    #   - +shape+ -> Desired output shape. Default returns a 1x100 row vector.
    # * *Returns* :
    #   - NMatrix with +:float64+ values.
    #
    # Examples :-
    #
    #   NMatrix.linspace(1,Math::PI, 6)
    #     =>[1.0,
    #        1.4283185005187988,
    #        1.8566370010375977,
    #        2.2849555015563965,
    #        2.7132740020751953,
    #        3.1415927410125732
    #       ]
    #
    #   NMatrix.linspace(1,10, [3,2])
    #     =>[
    #         [              1.0, 2.799999952316284]
    #         [4.599999904632568, 6.400000095367432]
    #         [8.199999809265137,              10.0]
    #       ]
    #
    def linspace(base, limit, shape = [100])
      
      # Convert shape to array format 
      shape = [shape] if shape.is_a? Integer 
      
      #Calculate number of elements 
      count = shape.inject(:*)
            
      # Linear spacing between elements calculated in step
      #   step = limit - base / (count - 1)
      #   [Result Sequence] = [0->N sequence] * step + [Base]
      step = (limit - base) * (1.0 / (count - 1))
      result = NMatrix.seq(shape, {:dtype => :float64}) * step
      result += NMatrix.new(shape, base)
      result
    end

    # call-seq:
    #     logspace(base, limit) -> 1x50 NMatrix with exponent_base = 10 
    #     logspace(base, limit, shape , exponent_base:) -> NMatrix
    #     logspace(base, :pi, n) -> 1xn NMatrix with interval [10 ^ base, Math::PI]
    #
    # Returns an NMatrix with +[shape[0] x shape[1] x .. x shape[dim-1]]+ values of dtype +:float64+ logarithmically spaced from
    # +exponent_base ^ base+ to +exponent_base ^ limit+, inclusive.
    #
    # See: http://www.mathworks.com/help/matlab/ref/logspace.html
    #
    # * *Arguments* :
    #   - +base+ -> exponent_base ** base is the first value in the sequence
    #   - +limit+ -> exponent_base ** limit is the last value in the sequence.
    #   - +shape+ -> Desired output shape. Default returns a 1x50 row vector.
    # * *Returns* :
    #   - NMatrix with +:float64+ values.
    #
    # Examples :-
    #
    #   NMatrix.logspace(1,:pi,7)
    #     =>[
    #         10.0000, 
    #         8.2450, 
    #         6.7980, 
    #         5.6050, 
    #         4.6213, 
    #         3.8103, 
    #         3.1416
    #       ]
    #
    #   NMatrix.logspace(1,2,[3,2])
    #     =>[
    #         [10.0, 15.8489]
    #         [25.1189, 39.8107]
    #         [63.0957, 100.0]
    #       ]
    #
    def logspace(base, limit, shape = [50], exponent_base: 10)

      #Calculate limit for [10 ^ base ... Math::PI] if limit = :pi
      limit = Math.log(Math::PI, exponent_base = 10) if limit == :pi 
      shape = [shape] if shape.is_a? Integer

      #[base...limit]  -> [exponent_base ** base ... exponent_base ** limit]
      result = NMatrix.linspace(base, limit, shape)
      result.map {|element| exponent_base ** element}
    end

    #
    # call-seq:
    #     linspace(base, limit) -> 1x100 NMatrix
    #     linspace(base, limit, *shape) -> NMatrix
    #
    # Returns an NMatrix with +[shape[0] x shape[1] x .. x shape[dim-1]]+ values of dtype +:float64+ equally spaced from
    # +base+ to +limit+, inclusive.
    #
    # See: http://www.mathworks.com/help/matlab/ref/linspace.html
    #
    # * *Arguments* :
    #   - +base+ -> The first value in the sequence.
    #   - +limit+ -> The last value in the sequence.
    #   - +shape+ -> Desired output shape. Default returns a 1x100 row vector.
    # * *Returns* :
    #   - NMatrix with +:float64+ values.
    #
    # Examples :-
    #
    #   NMatrix.linspace(1,Math::PI, 6)
    #     =>[1.0,
    #        1.4283185005187988,
    #        1.8566370010375977,
    #        2.2849555015563965,
    #        2.7132740020751953,
    #        3.1415927410125732
    #       ]
    #
    #   NMatrix.linspace(1,10, [3,2])
    #     =>[
    #         [              1.0, 2.799999952316284]
    #         [4.599999904632568, 6.400000095367432]
    #         [8.199999809265137,              10.0]
    #       ]
    #
    def linspace(base, limit, shape = [100])
      
      # Convert shape to array format 
      shape = [shape] if shape.is_a? Integer 
      
      #Calculate number of elements 
      count = shape.inject(:*)
            
      # Linear spacing between elements calculated in step
      #   step = limit - base / (count - 1)
      #   [Result Sequence] = [0->N sequence] * step + [Base]
      step = (limit - base) * (1.0 / (count - 1))
      result = NMatrix.seq(shape, {:dtype => :float64}) * step
      result += NMatrix.new(shape, base)
      result
    end

    # call-seq:
    #     logspace(base, limit) -> 1x50 NMatrix with exponent_base = 10 
    #     logspace(base, limit, shape , exponent_base:) -> NMatrix
    #     logspace(base, :pi, n) -> 1xn NMatrix with interval [10 ^ base, Math::PI]
    #
    # Returns an NMatrix with +[shape[0] x shape[1] x .. x shape[dim-1]]+ values of dtype +:float64+ logarithmically spaced from
    # +exponent_base ^ base+ to +exponent_base ^ limit+, inclusive.
    #
    # See: http://www.mathworks.com/help/matlab/ref/logspace.html
    #
    # * *Arguments* :
    #   - +base+ -> exponent_base ** base is the first value in the sequence
    #   - +limit+ -> exponent_base ** limit is the last value in the sequence.
    #   - +shape+ -> Desired output shape. Default returns a 1x50 row vector.
    # * *Returns* :
    #   - NMatrix with +:float64+ values.
    #
    # Examples :-
    #
    #   NMatrix.logspace(1,:pi,7)
    #     =>[
    #         10.0000, 
    #         8.2450, 
    #         6.7980, 
    #         5.6050, 
    #         4.6213, 
    #         3.8103, 
    #         3.1416
    #       ]
    #
    #   NMatrix.logspace(1,2,[3,2])
    #     =>[
    #         [10.0, 15.8489]
    #         [25.1189, 39.8107]
    #         [63.0957, 100.0]
    #       ]
    #
    def logspace(base, limit, shape = [50], exponent_base: 10)

      #Calculate limit for [10 ^ base ... Math::PI] if limit = :pi
      limit = Math.log(Math::PI, exponent_base = 10) if limit == :pi 
      shape = [shape] if shape.is_a? Integer

      #[base...limit]  -> [exponent_base ** base ... exponent_base ** limit]
      result = NMatrix.linspace(base, limit, shape)
      result.map {|element| exponent_base ** element}
    end

    #
    # call-seq:
    #     seq(shape) -> NMatrix
    #     seq(shape, options) -> NMatrix
    #     bindgen(shape) -> NMatrix of :byte
    #     indgen(shape) -> NMatrix of :int64
    #     findgen(shape) -> NMatrix of :float32
    #     dindgen(shape) -> NMatrix of :float64
    #     cindgen(shape) -> NMatrix of :complex64
    #     zindgen(shape) -> NMatrix of :complex128
    #     rbindgen(shape) -> NMatrix of :object
    #
    # Creates a matrix filled with a sequence of integers starting at zero.
    #
    # * *Arguments* :
    #   - +shape+ -> Array (or integer for square matrix) specifying the dimensions.
    #   - +options+ -> (optional) Options permissible for NMatrix#initialize
    # * *Returns* :
    #   - NMatrix filled with values 0 through +size+.
    #
    # Examples:
    #
    #   NMatrix.seq(2) # =>   0   1
    #                 2   3
    #
    #   NMatrix.seq([3, 3], dtype: :float32) # =>  0.0  1.0  2.0
    #                                       3.0  4.0  5.0
    #                                       6.0  7.0  8.0
    #
    def seq(shape, options={})

      # Construct the values of the final matrix based on the dimension.
      values = (0 ... NMatrix.size(shape)).to_a

      # It'll produce :int32, except if a dtype is provided.
      NMatrix.new(shape, values, {:stype => :dense}.merge(options))
    end

    {:bindgen => :byte, :indgen => :int64, :findgen => :float32, :dindgen => :float64,
     :cindgen => :complex64, :zindgen => :complex128,
     :rbindgen => :object}.each_pair do |meth, dtype|
      define_method(meth) { |shape| NMatrix.seq(shape, :dtype => dtype) }
    end
  end
end

module NVector #:nodoc:

  class << self
    #
    # call-seq:
    #     new(shape) -> NVector
    #     new(stype, shape) -> NVector
    #     new(shape, init) -> NVector
    #     new(:dense, shape, init) -> NVector
    #     new(:list, shape, init) -> NVector
    #     new(shape, init, dtype) -> NVector
    #     new(stype, shape, init, dtype) -> NVector
    #     new(stype, shape, dtype) -> NVector
    #
    # Creates a new NVector. See also NMatrix#initialize for a more detailed explanation of
    # the arguments.
    #
    # * *Arguments* :
    #   - +stype+ -> (optional) Storage type of the vector (:list, :dense, :yale). Defaults to :dense.
    #   - +shape+ -> Shape of the vector. Accepts [n,1], [1,n], or n, where n is a Fixnum.
    #   - +init+ -> (optional) Yale: capacity; List: default value (0); Dense: initial value or values (uninitialized by default).
    #   - +dtype+ -> (optional if +init+ provided) Data type stored in the vector. For :dense and :list, can be inferred from +init+.
    # * *Returns* :
    #   -
    #
    def new(*args)
      stype = args[0].is_a?(Symbol) ? args.shift : :dense
      shape = args[0].is_a?(Array) ? args.shift  : [1,args.shift]

      if shape.size != 2 || !shape.include?(1) || shape == [1,1]
        raise(ArgumentError, "shape must be a Fixnum or an Array of positive Fixnums where exactly one value is 1")
      end

      warn "NVector is deprecated and not guaranteed to work any longer"

      NMatrix.new(stype, shape, *args)
    end

    #
    # call-seq:
    #     zeros(size) -> NMatrix
    #     zeros(size, dtype) -> NMatrix
    #
    # Creates a new vector of zeros with the dimensions supplied as
    # parameters.
    #
    # * *Arguments* :
    #   - +size+ -> Array (or integer for square matrix) specifying the dimensions.
    #   - +dtype+ -> (optional) Default is +:float64+.
    # * *Returns* :
    #   - NVector filled with zeros.
    #
    # Examples:
    #
    #   NVector.zeros(2) # =>  0.0
    #                          0.0
    #
    #   NVector.zeros(3, :int32) # =>  0
    #                                  0
    #                                  0
    #
    def zeros(size, dtype = :float64)
      NMatrix.new([size,1], 0, dtype: dtype)
    end
    alias :zeroes :zeros

    #
    # call-seq:
    #     ones(size) -> NVector
    #     ones(size, dtype) -> NVector
    #
    # Creates a vector of ones with the dimensions supplied as
    # parameters.
    #
    # * *Arguments* :
    #   - +size+ -> Array (or integer for square matrix) specifying the dimensions.
    #   - +dtype+ -> (optional) Default is +:float64+.
    # * *Returns* :
    #   - NVector filled with ones.
    #
    # Examples:
    #
    #   NVector.ones(2) # =>  1.0
    #                         1.0
    #
    #   NVector.ones(3, :int32) # =>  1
    #                                 1
    #                                 1
    #
    def ones(size, dtype = :float64)
      NMatrix.new([size,1], 1, dtype: dtype)
    end

    #
    # call-seq:
    #     random(size) -> NVector
    #
    # Creates a vector with random numbers between 0 and 1 generated by
    # +Random::rand+ with the dimensions supplied as parameters.
    #
    # * *Arguments* :
    #   - +size+ -> Array (or integer for square matrix) specifying the dimensions.
    #   - +opts+ -> (optional) NMatrix#initialize options
    # * *Returns* :
    #   - NVector filled with random numbers generated by the +Random+ class.
    #
    # Examples:
    #
    #   NVector.rand(2) # =>  0.4859439730644226
    #                         0.1783195585012436
    #
    def random(size, opts = {})
      rng = Random.new

      random_values = []
      size.times { |i| random_values << rng.rand }

      NMatrix.new([size,1], random_values, opts)
    end

    #
    # call-seq:
    #     seq(n) -> NVector
    #     seq(n, dtype) -> NVector
    #
    # Creates a vector with a sequence of +n+ integers starting at zero. You
    # can choose other types based on the dtype parameter.
    #
    # * *Arguments* :
    #   - +n+ -> Number of integers in the sequence.
    #   - +dtype+ -> (optional) Default is +:int64+.
    # * *Returns* :
    #   - NVector filled with +n+ integers.
    #
    # Examples:
    #
    #   NVector.seq(2) # =>  0
    #                        1
    #
    #   NVector.seq(3, :float32) # =>  0.0
    #                                  1.0
    #                                  2.0
    #
    def seq(size, dtype = :int64)
      values = (0 ... size).to_a

      NMatrix.new([size,1], values, dtype: dtype)
    end

    #
    # call-seq:
    #     indgen(n) -> NVector
    #
    # Returns an integer NVector. Equivalent to <tt>seq(n, :int32)</tt>.
    #
    # * *Arguments* :
    #   - +n+ -> Size of the sequence.
    # * *Returns* :
    #   - NVector filled with +n+ integers of dtype +:int32+.
    #
    def indgen(n)
      NVector.seq(n, :int32)
    end

    #
    # call-seq:
    #     findgen(n) -> NVector
    #
    # Returns a float NVector. Equivalent to <tt>seq(n, :float32)</tt>.
    #
    # * *Arguments* :
    #   - +n+ -> Size of the sequence.
    # * *Returns* :
    #   - NVector filled with +n+ integers of dtype +:float32+.
    #
    def findgen(n)
      NVector.seq(n, :float32)
    end

    #
    # call-seq:
    #     bindgen(n) -> NVector
    #
    # Returns a byte NVector. Equivalent to <tt>seq(n, :byte)</tt>.
    #
    # * *Arguments* :
    #   - +n+ -> Size of the sequence.
    # * *Returns* :
    #   - NVector filled with +n+ integers of dtype +:byte+.
    #
    def bindgen(n)
      NVector.seq(n, :byte)
    end

    #
    # call-seq:
    #     cindgen(n) -> NVector
    #
    # Returns a complex NVector. Equivalent to <tt>seq(n, :complex64)</tt>.
    #
    # * *Arguments* :
    #   - +n+ -> Size of the sequence.
    # * *Returns* :
    #   - NVector filled with +n+ integers of dtype +:complex64+.
    #
    def cindgen(n)
      NVector.seq(n, :complex64)
    end

    #
    # call-seq:
    #     linspace(a, b) -> NVector
    #     linspace(a, b, n) -> NVector
    #
    # Returns a NVector with +n+ values of dtype +:float64+ equally spaced from
    # +a+ to +b+, inclusive.
    #
    # See: http://www.mathworks.com/help/matlab/ref/linspace.html
    #
    # * *Arguments* :
    #   - +a+ -> The first value in the sequence.
    #   - +b+ -> The last value in the sequence.
    #   - +n+ -> The number of elements. Default is 100.
    # * *Returns* :
    #   - NVector with +n+ +:float64+ values.
    #
    # Example:
    #   x = NVector.linspace(0, Math::PI, 1000)
    #   x.pretty_print
    #     [0.0
    #     0.0031447373909807737
    #     0.006289474781961547
    #     ...
    #     3.135303178807831
    #     3.138447916198812
    #     3.141592653589793]
    #   => nil
    #
    def linspace(a, b, n = 100)
      # Formula: seq(n) * step + a

      # step = ((b - a) / (n - 1))
      step = (b - a) * (1.0 / (n - 1))

      # dtype = :float64 is used to prevent integer coercion.
      result = NVector.seq(n, :float64) * NMatrix.new([n,1], step, dtype: :float64)
      result += NMatrix.new([n,1], a, dtype: :float64)
      result
    end

    #
    # call-seq:
    #     logspace(a, b) -> NVector
    #     logspace(a, b, n) -> NVector
    #
    # Returns a NVector with +n+ values of dtype +:float64+ logarithmically
    # spaced from +10^a+ to +10^b+, inclusive.
    #
    # See: http://www.mathworks.com/help/matlab/ref/logspace.html
    #
    # * *Arguments* :
    #   - +a+ -> The first value in the sequence.
    #   - +b+ -> The last value in the sequence.
    #   - +n+ -> The number of elements. Default is 100.
    # * *Returns* :
    #   - NVector with +n+ +:float64+ values.
    #
    # Example:
    #   x = NVector.logspace(0, Math::PI, 10)
    #   x.pretty_print
    #     [1.0
    #     2.2339109164570266
    #     4.990357982665873
    #     11.148015174505757
    #     24.903672795156997
    #     55.632586516975095
    #     124.27824233101062
    #     277.6265222213364
    #     620.1929186882427
    #     1385.4557313670107]
    #  => nil
    #
    def logspace(a, b, n = 100)
      # Formula: 10^a, 10^(a + step), ..., 10^b, where step = ((b-a) / (n-1)).

      result = NVector.linspace(a, b, n)
      result.each_stored_with_index { |element, i| result[i] = 10 ** element }
      result
    end
  end
end


# This constant is intended as a simple constructor for NMatrix meant for
# experimenting.
#
# Examples:
#
#   a = N[ 1,2,3,4 ]          =>  1  2  3  4
#
#   a = N[ 1,2,3,4, :int32 ]  =>  1  2  3  4
#
#   a = N[ [1,2,3], [3,4,5] ] =>  1  2  3
#                                 3  4  5
#
#   a = N[ 3,6,9 ].transpose => 3
#                               6
#                               9
N = NMatrix


================================================
FILE: lib/nmatrix/version.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2016, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2016, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#++

class NMatrix
  # Note that the format of the VERSION string is needed for NMatrix
  # native IO. If you change the format, please make sure that native
  # IO can still understand NMatrix::VERSION.
  module VERSION #:nodoc:
    MAJOR = 0
    MINOR = 2
    TINY = 4
    #PRE = "a"

    STRING = [MAJOR, MINOR, TINY].compact.join(".")
    #STRING = [MAJOR, MINOR, TINY, PRE].compact.join(".")
  end
end


================================================
FILE: lib/nmatrix/yale_functions.rb
================================================
#--
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == yale_functions.rb
#
# This file contains some shortcut functions for the specialty
# Yale matrix extensions (mostly for debugging and experimental
# purposes, but sometimes applicable when you need to speed up
# your code a lot).
#++

module NMatrix::YaleFunctions
  # call-seq:
  #     yale_nd_row_size(i) -> Fixnum
  #
  # Returns the size of a given non-diagonal row.
  def yale_nd_row_size i
    yale_ija(i+1) - yale_ija(i)
  end

  # call-seq:
  #     yale_ja_at(i) -> Array
  #
  # Returns the non-diagonal column indices which are stored in a given row.
  def yale_ja_at i
    yale_nd_row(i, :keys)
  end
  alias :yale_nd_row_as_array :yale_ja_at

  # call-seq:
  #     yale_ja_set_at(i) -> Set
  #
  # Returns the non-diagonal column indices which are stored in a given row, as a Set.
  def yale_ja_set_at i
    require 'set'
    yale_nd_row(i, :keys).to_set
  end
  alias :yale_nd_row_as_set :yale_ja_set_at

  # call-seq:
  #     yale_ja_sorted_set_at -> SortedSet
  #
  # Returns the non-diagonal column indices which are stored in a given row, as a Set.
  def yale_ja_sorted_set_at i
    require 'set'
    SortedSet.new(yale_nd_row(i, :keys))
  end
  alias :yale_nd_row_as_sorted_set :yale_ja_sorted_set_at

  # call-seq:
  #     yale_nd_row_as_hash(i) -> Hash
  #
  # Returns the non-diagonal column indices and entries stored in a given row.
  def yale_nd_row_as_hash i
    yale_nd_row(i, :hash)
  end

  # call-seq:
  #     yale_ja_d_keys_at(i) -> Array
  #
  # Returns the diagonal and non-digonal column indices stored in a given row.
  def yale_ja_d_keys_at i
    ary = yale_nd_row(i, :keys)
    return ary if i >= self.shape[1] || self[i,i] == self.default_value
    ary << i
  end
  alias :yale_row_as_array :yale_ja_d_keys_at

  # call-seq:
  #     yale_ja_d_keys_set_at(i) -> Set
  #
  # Returns the diagonal and non-diagonal column indices stored in a given row.
  def yale_ja_d_keys_set_at i
    require 'set'
    yale_ja_d_keys_at(i).to_set
  end
  alias :yale_row_as_set :yale_ja_d_keys_set_at

  # call-seq:
  #     yale_ja_d_keys_sorted_set_at(i) -> SortedSet
  #
  # Returns the diagonal and non-diagonal column indices stored in a given row.
  def yale_ja_d_keys_sorted_set_at i
    require 'set'
    SortedSet.new(yale_row_as_array(i))
  end
  alias :yale_row_as_sorted_set :yale_ja_d_keys_sorted_set_at

  # call-seq:
  #     yale_row_as_hash(i) -> Hash
  #
  # Returns the diagonal and non-diagonal column indices and entries stored in a given row.
  def yale_row_as_hash i
    h = yale_nd_row(i, :hash)
    return h if i >= self.shape[1] || self[i,i] == self.default_value
    h[i] = self[i,i]
  end
end

================================================
FILE: lib/nmatrix.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == nmatrix.rb
#
# This file is a stub that only loads the main NMatrix file.
#

require 'nmatrix/nmatrix.rb'


================================================
FILE: nmatrix-atlas.gemspec
================================================
lib = File.expand_path('../lib/', __FILE__)
$:.unshift lib unless $:.include?(lib)

require 'nmatrix/version'

Gem::Specification.new do |gem|
  gem.name = "nmatrix-atlas"
  gem.version = NMatrix::VERSION::STRING
  gem.summary = "ATLAS backend for nmatrix"
  gem.description = "For using linear algebra fuctions provided by ATLAS"
  gem.homepage = 'http://sciruby.com'
  gem.authors = ['Will Levine', 'John Woods']
  gem.email =  ['john.o.woods@gmail.com']
  gem.license = 'BSD-3-Clause'

  gem.files         = ["lib/nmatrix/atlas.rb","lib/nmatrix/lapack_ext_common.rb"]
  gem.files         += `git ls-files -- ext/nmatrix_atlas`.split("\n")
  gem.files         += `git ls-files -- ext/nmatrix | grep ".h$"`.split("\n") #need nmatrix header files to compile
  gem.test_files    = `git ls-files -- spec`.split("\n")
  gem.test_files    -= `git ls-files -- spec/plugins`.split("\n")
  gem.test_files    += `git ls-files -- spec/plugins/atlas`.split("\n")
  gem.extensions = ['ext/nmatrix_atlas/extconf.rb']
  gem.require_paths = ["lib"]

  gem.required_ruby_version = '>= 1.9'

  gem.add_dependency 'nmatrix', NMatrix::VERSION::STRING
end


================================================
FILE: nmatrix-fftw.gemspec
================================================
lib = File.expand_path('../lib/', __FILE__)
$:.unshift lib unless $:.include?(lib)

require 'nmatrix/version'

Gem::Specification.new do |gem|
  gem.name = "nmatrix-fftw"
  gem.version = NMatrix::VERSION::STRING
  gem.summary = "FFTW backend for NMatrix"
  gem.description = "NMatrix extension for using fuctions provided by FFTW"
  gem.homepage = 'http://sciruby.com'
  gem.authors = ['Sameer Deshmukh', 'Magdalen Berns']
  gem.email =  ['sameer.deshmukh93@gmail.com', 'm.berns@thismagpie.com']
  gem.license = 'BSD-3-Clause'

  gem.files         = ["lib/nmatrix/fftw.rb"]
  gem.files         += `git ls-files -- ext/nmatrix_fftw`.split("\n")
  gem.files         += `git ls-files -- ext/nmatrix | grep ".h$"`.split("\n") #need nmatrix header files to compile
  gem.test_files    = `git ls-files -- spec`.split("\n")
  gem.test_files    -= `git ls-files -- spec/plugins`.split("\n")
  gem.test_files    += `git ls-files -- spec/plugins/fftw`.split("\n")
  gem.extensions = ['ext/nmatrix_fftw/extconf.rb']
  gem.require_paths = ["lib"]

  gem.required_ruby_version = '>= 1.9'

  gem.add_dependency 'nmatrix', NMatrix::VERSION::STRING
end


================================================
FILE: nmatrix-lapacke.gemspec
================================================
lib = File.expand_path('../lib/', __FILE__)
$:.unshift lib unless $:.include?(lib)

require 'nmatrix/version'

Gem::Specification.new do |gem|
  gem.name = "nmatrix-lapacke"
  gem.version = NMatrix::VERSION::STRING
  gem.summary = "general LAPACK backend for nmatrix using LAPACKE interface"
  gem.description = "For using linear algebra fuctions provided by LAPACK and BLAS"
  gem.homepage = 'http://sciruby.com'
  gem.authors = ['Will Levine', 'John Woods']
  gem.email =  ['john.o.woods@gmail.com']
  gem.license = 'BSD-3-Clause'

  gem.files         = ["lib/nmatrix/lapacke.rb","lib/nmatrix/lapack_ext_common.rb"]
  gem.files         += `git ls-files -- ext/nmatrix_lapacke`.split("\n")
  gem.files         += `git ls-files -- ext/nmatrix | grep ".h$"`.split("\n") #need nmatrix header files to compile
  gem.test_files    = `git ls-files -- spec`.split("\n")
  gem.test_files    -= `git ls-files -- spec/plugins`.split("\n")
  gem.test_files    += `git ls-files -- spec/plugins/lapacke`.split("\n")
  gem.extensions = ['ext/nmatrix_lapacke/extconf.rb']
  gem.require_paths = ["lib"]

  gem.required_ruby_version = '>= 1.9'

  gem.add_dependency 'nmatrix', NMatrix::VERSION::STRING
end


================================================
FILE: nmatrix.gemspec
================================================
lib = File.expand_path('../lib/', __FILE__)
$:.unshift lib unless $:.include?(lib)

require 'nmatrix/version'

#get files that are used by plugins rather than the main nmatrix gem
plugin_files = []
Dir["nmatrix-*.gemspec"].each do |gemspec_file|
  gemspec = eval(File.read(gemspec_file))
  plugin_files += gemspec.files
end
plugin_lib_files = plugin_files.select { |file| file.match(/^lib\//) }

Gem::Specification.new do |gem|
  gem.name = "nmatrix"
  gem.version = NMatrix::VERSION::STRING
  gem.summary = "NMatrix is a linear algebra library for Ruby"
  gem.description = "NMatrix is a linear algebra library for Ruby, written mostly in C and C++."
  gem.homepage = 'http://sciruby.com'
  gem.authors = ['John Woods', 'Chris Wailes', 'Aleksey Timin']
  gem.email =  ['john.o.woods@gmail.com']
  gem.license = 'BSD-3-Clause'
  gem.post_install_message = <<-EOF
***********************************************************
Welcome to SciRuby: Tools for Scientific Computing in Ruby!

NMatrix requires a C/C++ compiler. Clang and GCC are 
recommended. JRuby support is experimental, and requires
Java.

If you are upgrading from NMatrix 0.1.0 and rely on
ATLAS features, please check the README.

Faster matrix calculations and more advanced linear
algebra features are available by installing either
the nmatrix-atlas or nmatrix-lapacke plugins.

More explicit instructions for NMatrix and SciRuby should
be available on the SciRuby website, sciruby.com, or
through our mailing list (which can be found on our web-
site).

Thanks for trying out NMatrix! Happy coding!

***********************************************************
EOF

  gem.files         = `git ls-files -- ext/nmatrix`.split("\n")
  gem.files         += `git ls-files -- lib`.split("\n")
  gem.files         -= plugin_lib_files
  gem.test_files    = `git ls-files -- spec`.split("\n")
  gem.test_files    -= `git ls-files -- spec/plugins`.split("\n")
  gem.extensions = ['ext/nmatrix/extconf.rb']
  gem.require_paths = ["lib"]

  gem.required_ruby_version = '>= 1.9'

  gem.add_dependency 'packable', '~> 1.3', '>= 1.3.5'
  gem.add_development_dependency 'bundler', '~>1.6'
  gem.add_development_dependency 'pry', '~>0.10'
  gem.add_development_dependency 'rake', '~>10.3'
  gem.add_development_dependency 'rake-compiler', '~>0.8'
  gem.add_development_dependency 'rdoc', '~>4.0', '>=4.0.1'
  gem.add_development_dependency 'rspec', '~>2.14'
  gem.add_development_dependency 'rspec-longrun', '~>1.0'
end


================================================
FILE: scripts/benchmarks/nmatrix_creation.rb
================================================


================================================
FILE: scripts/switch_lapack_ubuntu.rb
================================================
#!/usr/bin/env ruby

if ARGV[0] == "atlas"
  lapack_prefix = "/usr/lib/atlas-base/atlas"
  blas_prefix = "/usr/lib/atlas-base/atlas"
elsif ARGV[0] == "openblas"
  lapack_prefix = "/usr/lib/openblas-base"
  blas_prefix = "/usr/lib/openblas-base"
elsif ARGV[0] == "ref"
  lapack_prefix = "/usr/lib/lapack"
  blas_prefix = "/usr/lib/libblas"
else
  puts "options are atlas, openblas, or ref"
  exit
end

def run(cmd)
  puts "> #{cmd}"
  system cmd
end


run "update-alternatives --set liblapack.so.3 #{lapack_prefix}/liblapack.so.3"
run "update-alternatives --set liblapack.so #{lapack_prefix}/liblapack.so"
run "update-alternatives --set libblas.so.3 #{blas_prefix}/libblas.so.3"
run "update-alternatives --set libblas.so #{blas_prefix}/libblas.so"


================================================
FILE: scripts/ttable_helper.rb
================================================
#!/usr/bin/ruby

# A helper file for generating and maintaining template tables.

DTYPES = [
          :uint8_t,
          :int8_t,
          :int16_t,
          :int32_t,
          :int64_t,
          :float32_t,
          :float64_t,
          :'nm::Complex64',
          :'nm::Complex128',
          :'nm::RubyObject'
         ]

def nullify(disabled = []) #:nodoc:
  DTYPES.map { |t| if disabled.include?(t) then :NULL else t end }
end

ITYPES = [
          :uint8_t,
          :uint16_t,
          :uint32_t,
          :uint64_t
         ]

EWOPS = [
         :'nm::EW_ADD',
         :'nm::EW_SUB',
         :'nm::EW_MUL',
         :'nm::EW_DIV',
         :'nm::EW_POW',
         :'nm::EW_MOD',
         :'nm::EW_EQEQ',
         :'nm::EW_NEQ',
         :'nm::EW_LT',
         :'nm::EW_GT',
         :'nm::EW_LEQ',
         :'nm::EW_GEQ'
        ]

LR_ALLOWED = {
  :uint8_t       => DTYPES,
  :int8_t        => DTYPES,
  :int16_t      => DTYPES,
  :int32_t      => DTYPES,
  :int64_t      => DTYPES,
  :float32_t    => DTYPES,
  :float64_t    => DTYPES,
  :'nm::Complex64'    => DTYPES,
  :'nm::Complex128'    => DTYPES,
  :'nm::RubyObject'    => DTYPES
}

lines =
  case ARGV[0]
  when 'OPLR'
    '{' +
      EWOPS.map do |op|

    '{' +
      DTYPES.map do |l_dtype|

      '{' +
        LR_ALLOWED[l_dtype].map do |r_dtype|
        if r_dtype == :NULL
          'NULL'
        else
          "fun<#{op}, #{l_dtype}, #{r_dtype}>"
        end
      end.join(', ') +
        '}'

    end.join(",\n") +
      '}'

  end.join(",\n") +
      '}'

  when 'OPID'
    '{' +
      EWOPS.map do |op|
    '{' +
      ITYPES.map do |itype|
      '{' +
        DTYPES.map do |dtype|

        if dtype == :NULL
          'NULL'
        else
          "fun<#{op}, #{itype}, #{dtype}>"
        end

      end.join(",") +
        '}'
    end.join(",\\\n") +
      '}'
  end.join(",\\\n") +
      '}'

  when 'LR'
    '{' + DTYPES.map do |l_dtype|
      '{' + LR_ALLOWED[l_dtype].map do |r_dtype|
        if r_dtype == :NULL
          'NULL'
        else
          "fun<#{l_dtype}, #{r_dtype}>"
        end
      end.join(', ') + '}'
    end.join(",\n") + '}'
  end

puts lines


================================================
FILE: spec/00_nmatrix_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == 00_nmatrix_spec.rb
#
# Basic tests for NMatrix. These should load first, as they're
# essential to NMatrix operation.
#
require 'spec_helper'

describe NMatrix do
  it "creates a matrix with the new constructor" do
    n = NMatrix.new([2,2], [0,1,2,3], dtype: :int64)
    expect(n.shape).to eq([2,2])
    expect(n.entries).to eq([0,1,2,3])
    expect(n.dtype).to eq(:int64)
  end

  it "adequately requires information to access a single entry of a dense matrix" do
    n = NMatrix.new(:dense, 4, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15], :float64)
    expect(n[0,0]).to eq(0)
    expect { n[0] }.to raise_error(ArgumentError)
  end

  it "calculates exact determinants on small dense matrices" do
    expect(NMatrix.new(2, [1,2,3,4], stype: :dense, dtype: :int64).det_exact).to eq(-2)
    expect(NMatrix.new(3, [1,2,3,0,5,6,7,8,0], stype: :dense, dtype: :int64)
           .det_exact).to eq(-69)
  end

  it "calculates exact determinants on small yale square matrices" do
    expect(NMatrix.new(2, [1,2,3,4], stype: :yale, dtype: :int64).det_exact).to eq(-2)
    expect(NMatrix.new(3, [1,2,3,0,5,6,7,8,0], stype: :yale, dtype: :int64)
           .det_exact).to eq(-69)
  end

  it "calculates exact determinants on small list square matrices" do
    expect(NMatrix.new(2, [1,2,3,4], stype: :list, dtype: :int64).det_exact).to eq(-2)
    expect(NMatrix.new(3, [1,2,3,0,5,6,7,8,0], stype: :list, dtype: :int64)
           .det_exact).to eq(-69)
  end

  it "calculates inverse exact determinants on small dense matrices" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    a = NMatrix.new(3, [1,2,3,0,1,4,5,6,0], stype: :dense, dtype: :int64)
    inversed = a.method(:__inverse_exact__).call(a.clone, 3, 3)
    b = NMatrix.new(3, [-24,18,5,20,-15,-4,-5,4,1], stype: :dense, dtype: :int64)
    expect(inversed).to eq(b)

    c = NMatrix.new(3, [1,0,3,0,0,1,0,6,0], stype: :dense, dtype: :int64)
    inversed = c.method(:__inverse_exact__).call(c.clone, 3, 3)
    d = NMatrix.new(3, [1,-3,0,0,0,0,0,1,0], stype: :dense, dtype: :int64)
    expect(inversed).to eq(d)

    e = NMatrix.new(2, [3,1,2,1], stype: :dense, dtype: :int64)
    inversed = e.method(:__inverse_exact__).call(e.clone, 2, 2)
    f = NMatrix.new(2, [1,-1,-2,3], stype: :dense, dtype: :int64)
    expect(inversed).to eq(f)
  end

  it "calculates inverse exact determinants on small yale matrices" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    a = NMatrix.new(3, [1,2,3,0,1,4,5,6,0], stype: :yale, dtype: :int64)
    inversed = a.method(:__inverse_exact__).call(a.clone, 3, 3)
    b = NMatrix.new(3, [-24,18,5,20,-15,-4,-5,4,1], stype: :yale, dtype: :int64)
    expect(inversed).to eq(b)

    c = NMatrix.new(3, [1,0,3,0,0,1,0,6,0], stype: :yale, dtype: :int64)
    inversed = c.method(:__inverse_exact__).call(c.clone, 3, 3)
    d = NMatrix.new(3, [1,-3,0,0,0,0,0,1,0], stype: :yale, dtype: :int64)
    expect(inversed).to eq(d)

    e = NMatrix.new(2, [3,1,2,1], stype: :yale, dtype: :int64)
    inversed = e.method(:__inverse_exact__).call(e.clone, 2, 2)
    f = NMatrix.new(2, [1,-1,-2,3], stype: :yale, dtype: :int64)
    expect(inversed).to eq(f)
  end

  it "calculates inverse exact determinants on small list matrices" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    a = NMatrix.new(3, [1,2,3,0,1,4,5,6,0], stype: :list, dtype: :int64)
    inversed = a.method(:__inverse_exact__).call(a.clone, 3, 3)
    b = NMatrix.new(3, [-24,18,5,20,-15,-4,-5,4,1], stype: :list, dtype: :int64)
    expect(inversed).to eq(b)

    c = NMatrix.new(2, [3,1,2,1], stype: :list, dtype: :int64)
    inversed = c.method(:__inverse_exact__).call(c.clone, 2, 2)
    d = NMatrix.new(2, [1,-1,-2,3], stype: :list, dtype: :int64)
    expect(inversed).to eq(d)
  end

  it "calculates determinants" do
    expect(NMatrix.new(3, [-2,2,3,-1,1,3,2,0,-1], stype: :dense, dtype: :int64).det).to eq(6)
  end

  it "allows casting to Ruby objects" do
    m = NMatrix.new([3,3], [0,0,1,0,2,0,3,4,5], dtype: :int64, stype: :dense)
    n = m.cast(:dense, :object)
    expect(n).to eq(m)
  end

  it "allows casting from Ruby objects" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    m = NMatrix.new(:dense, [3,3], [0,0,1,0,2,0,3,4,5], :object)
    n = m.cast(:dense, :int64)
    expect(m).to eq(n)
  end

  it "allows stype casting of a dim 2 matrix between dense, sparse, and list (different dtypes)" do
    m = NMatrix.new(:dense, [3,3], [0,0,1,0,2,0,3,4,5], :int64).
      cast(:yale, :int32).
      cast(:dense, :float64).
      cast(:list, :object).
      cast(:dense, :int16).
      cast(:list, :int32).
      cast(:yale, :int64) #.
    #cast(:list, :int32).
    #cast(:dense, :int16)
    #m.should.equal?(original)
    # For some reason this causes some weird garbage collector problems when we uncomment these. The above lines won't
    # work at all in IRB, but work fine when run in a regular Ruby session.
  end

  it "fills dense Ruby object matrix with nil" do
    n = NMatrix.new([4,3], dtype: :object)
    pending("not yet implemented for object dtype for NMatrix-JRuby") if jruby?
    expect(n[0,0]).to eq(nil)
  end

  it "fills dense with individual assignments" do
    n = NMatrix.new([4,3], dtype: :float64)
    n[0,0] = 14.0
    n[0,1] = 9.0
    n[0,2] = 3.0
    n[1,0] = 2.0
    n[1,1] = 11.0
    n[1,2] = 15.0
    n[2,0] = 0.0
    n[2,1] = 12.0
    n[2,2] = 17.0
    n[3,0] = 5.0
    n[3,1] = 2.0
    n[3,2] = 3.0

    expect(n[0,0]).to eq(14.0)
    expect(n[0,1]).to eq(9.0)
    expect(n[0,2]).to eq(3.0)
    expect(n[1,0]).to eq(2.0)
    expect(n[1,1]).to eq(11.0)
    expect(n[1,2]).to eq(15.0)
    expect(n[2,0]).to eq(0.0)
    expect(n[2,1]).to eq(12.0)
    expect(n[2,2]).to eq(17.0)
    expect(n[3,0]).to eq(5.0)
    expect(n[3,1]).to eq(2.0)
    expect(n[3,2]).to eq(3.0)
  end

  it "fills dense with a single mass assignment" do
    n = NMatrix.new([4,3], [14.0, 9.0, 3.0, 2.0, 11.0, 15.0, 0.0, 12.0, 17.0, 5.0, 2.0, 3.0])

    expect(n[0,0]).to eq(14.0)
    expect(n[0,1]).to eq(9.0)
    expect(n[0,2]).to eq(3.0)
    expect(n[1,0]).to eq(2.0)
    expect(n[1,1]).to eq(11.0)
    expect(n[1,2]).to eq(15.0)
    expect(n[2,0]).to eq(0.0)
    expect(n[2,1]).to eq(12.0)
    expect(n[2,2]).to eq(17.0)
    expect(n[3,0]).to eq(5.0)
    expect(n[3,1]).to eq(2.0)
    expect(n[3,2]).to eq(3.0)
  end

  it "fills dense with a single mass assignment, with dtype specified" do
    m = NMatrix.new([4,3], [14.0, 9.0, 3.0, 2.0, 11.0, 15.0, 0.0, 12.0, 17.0, 5.0, 2.0, 3.0], dtype: :float32)

    expect(m[0,0]).to eq(14.0)
    expect(m[0,1]).to eq(9.0)
    expect(m[0,2]).to eq(3.0)
    expect(m[1,0]).to eq(2.0)
    expect(m[1,1]).to eq(11.0)
    expect(m[1,2]).to eq(15.0)
    expect(m[2,0]).to eq(0.0)
    expect(m[2,1]).to eq(12.0)
    expect(m[2,2]).to eq(17.0)
    expect(m[3,0]).to eq(5.0)
    expect(m[3,1]).to eq(2.0)
    expect(m[3,2]).to eq(3.0)
  end

  it "dense handles missing initialization value" do
    n = NMatrix.new(3, dtype: :int8)
    pending("not yet implemented for int dtype for NMatrix-JRuby") if jruby?
    expect(n.stype).to eq(:dense)
    expect(n.dtype).to eq(:int8)

    m = NMatrix.new(4, dtype: :float64)
    expect(m.stype).to eq(:dense)
    expect(m.dtype).to eq(:float64)
  end

  [:dense, :list, :yale].each do |storage_type|
    context storage_type do
    it "can be duplicated" do
        n = NMatrix.new([2,3], 1.1, stype: storage_type, dtype: :float64)
        # FIXME
        pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby? #and storage_type != :dense
        expect(n.stype).to eq(storage_type)

        n[0,0] = 0.0
        n[0,1] = 0.1
        n[1,0] = 1.0

        m = n.dup
        expect(m.shape).to eq(n.shape)
        expect(m.dim).to eq(n.dim)
        expect(m.object_id).not_to eq(n.object_id)
        expect(m.stype).to eq(storage_type)
        expect(m[0,0]).to eq(n[0,0])
        m[0,0] = 3.0
        expect(m[0,0]).not_to eq(n[0,0])
      end

      it "enforces shape boundaries" do
        expect { NMatrix.new([1,10], 0, dtype: :int8, stype: storage_type, default: 0)[1,0]  }.to raise_error(RangeError)
        expect { NMatrix.new([1,10], 0, dtype: :int8, stype: storage_type, default: 0)[0,10] }.to raise_error(RangeError)
      end

      it "sets and gets" do
        n = NMatrix.new(2, 0, stype: storage_type, dtype: :int8)
        n[0,1] = 1
        expect(n[0,0]).to eq(0)
        expect(n[1,0]).to eq(0)
        expect(n[0,1]).to eq(1)
        expect(n[1,1]).to eq(0)
      end

      it "sets and gets references" do
        n = NMatrix.new(2, stype: storage_type, dtype: :int8, default: 0)
        expect(n[0,1] = 1).to eq(1)
        expect(n[0,1]).to eq(1)
      end

      # Tests Ruby object versus any C dtype (in this case we use :int64)
      [:object, :int64].each do |dtype|
        c = dtype == :object ? "Ruby object" : "non-Ruby object"
        context c do
          it "allows iteration of matrices" do
            n = nil
            if storage_type == :dense
              n = NMatrix.new(:dense, [3,3], [1,2,3,4,5,6,7,8,9], dtype)
            else
              n = NMatrix.new([3,4], 0, stype: storage_type, dtype: dtype)
              n[0,0] = 1
              n[0,1] = 2
              n[2,3] = 4
              n[2,0] = 3
            end

            ary = []
            n.each do |x|
              ary << x
            end

            if storage_type == :dense
              expect(ary).to eq([1,2,3,4,5,6,7,8,9])
            else
              expect(ary).to eq([1,2,0,0,0,0,0,0,3,0,0,4])
            end
          end

          it "allows storage-based iteration of matrices" do
            pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby? and storage_type != :dense
            STDERR.puts storage_type.inspect
            STDERR.puts dtype.inspect
            n = NMatrix.new([3,3], 0, stype: storage_type, dtype: dtype)
            n[0,0] = 1
            n[0,1] = 2
            n[2,0] = 5 if storage_type == :yale
            n[2,1] = 4
            n[2,2] = 3

            values = []
            is = []
            js = []

            n.each_stored_with_indices do |v,i,j|
              values << v
              is << i
              js << j
            end

            if storage_type == :yale
              expect(is).to     eq([0,1,2,0,2,2])
              expect(js).to     eq([0,1,2,1,0,1])
              expect(values).to eq([1,0,3,2,5,4])
            elsif storage_type == :list
              expect(values).to eq([1,2,4,3])
              expect(is).to     eq([0,0,2,2])
              expect(js).to     eq([0,1,1,2])
            elsif storage_type == :dense
              expect(values).to eq([1,2,0,0,0,0,0,4,3])
              expect(is).to     eq([0,0,0,1,1,1,2,2,2])
              expect(js).to     eq([0,1,2,0,1,2,0,1,2])
            end
          end
        end
      end
    end

    # dense and list, not yale
    context "(storage: #{storage_type})" do
      it "gets default value" do
        pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
        expect(NMatrix.new(3, 0, stype: storage_type)[1,1]).to eq(0)
        expect(NMatrix.new(3, 0.1, stype: storage_type)[1,1]).to eq(0.1)
        expect(NMatrix.new(3, 1, stype: storage_type)[1,1]).to eq(1)

      end
      it "returns shape and dim" do
        expect(NMatrix.new([3,2,8], 0, stype: storage_type).shape).to eq([3,2,8])
        expect(NMatrix.new([3,2,8], 0, stype: storage_type).dim).to eq(3)
      end

      it "returns number of rows and columns" do
        expect(NMatrix.new([7, 4], 3, stype: storage_type).rows).to eq(7)
        expect(NMatrix.new([7, 4], 3, stype: storage_type).cols).to eq(4)
      end
    end unless storage_type == :yale
  end


  it "handles dense construction" do
    expect(NMatrix.new(3,0)[1,1]).to eq(0)
    expect(lambda { NMatrix.new(3,dtype: :int8)[1,1] }).to_not raise_error
  end

  it "converts from list to yale properly" do
    m = NMatrix.new(3, 0, stype: :list)
    m[0,2] = 333
    m[2,2] = 777
    n = m.cast(:yale, :int32)
    #puts n.capacity
    #n.extend NMatrix::YaleFunctions
    #puts n.yale_ija.inspect
    #puts n.yale_a.inspect

    expect(n[0,0]).to eq(0)
    expect(n[0,1]).to eq(0)
    expect(n[0,2]).to eq(333)
    expect(n[1,0]).to eq(0)
    expect(n[1,1]).to eq(0)
    expect(n[1,2]).to eq(0)
    expect(n[2,0]).to eq(0)
    expect(n[2,1]).to eq(0)
    expect(n[2,2]).to eq(777)
  end

  it "should return an enumerator when each is called without a block" do
    a = NMatrix.new(2, 1)
    b = NMatrix.new(2, [-1,0,1,0])
    enums = [a.each, b.each]

    begin
      atans = []
      atans << Math.atan2(*enums.map(&:next)) while true
    rescue StopIteration
    end
  end

  context "dense" do
    it "should return the matrix being iterated over when each is called with a block" do
      # FIXME
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      a = NMatrix.new(2, 1)
      val = (a.each { })
      expect(val).to eq(a)
    end

    it "should return the matrix being iterated over when each_stored_with_indices is called with a block" do
      # FIXME
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      a = NMatrix.new(2,1)
      val = (a.each_stored_with_indices { })
      expect(val).to eq(a)
    end
  end

  [:list, :yale].each do |storage_type|
    context storage_type do
      it "should return the matrix being iterated over when each_stored_with_indices is called with a block" do
        pending("not yet implemented for Complex dtype for NMatrix-JRuby") if jruby?
        n = NMatrix.new([2,3], 1.1, stype: storage_type, dtype: :float64, default: 0)
        val = (n.each_stored_with_indices { })
        expect(val).to eq(n)
      end

      it "should return an enumerator when each_stored_with_indices is called without a block" do
        pending("not yet implemented for Complex dtype for NMatrix-JRuby") if jruby?
        n = NMatrix.new([2,3], 1.1, stype: storage_type, dtype: :float64, default: 0)
        val = n.each_stored_with_indices
        expect(val).to be_a Enumerator
      end
    end
  end

  it "should iterate through element 256 without a segfault" do
    t = NVector.random(256)
    t.each { |x| x + 0 }
  end
end


describe 'NMatrix' do
  context "#upper_triangle" do
    it "should create a copy with the lower corner set to zero" do
      n = NMatrix.seq(4)+1
      expect(n.upper_triangle).to eq(NMatrix.new(4, [1,2,3,4,0,6,7,8,0,0,11,12,0,0,0,16]))
      expect(n.upper_triangle(2)).to eq(NMatrix.new(4, [1,2,3,4,5,6,7,8,9,10,11,12,0,14,15,16]))
    end
  end

  context "#lower_triangle" do
    it "should create a copy with the lower corner set to zero" do
      n = NMatrix.seq(4)+1
      expect(n.lower_triangle).to eq(NMatrix.new(4, [1,0,0,0,5,6,0,0,9,10,11,0,13,14,15,16]))
      expect(n.lower_triangle(2)).to eq(NMatrix.new(4, [1,2,3,0,5,6,7,8,9,10,11,12,13,14,15,16]))
    end
  end

  context "#upper_triangle!" do
    it "should create a copy with the lower corner set to zero" do
      n = NMatrix.seq(4)+1
      expect(n.upper_triangle!).to eq(NMatrix.new(4, [1,2,3,4,0,6,7,8,0,0,11,12,0,0,0,16]))
      n = NMatrix.seq(4)+1
      expect(n.upper_triangle!(2)).to eq(NMatrix.new(4, [1,2,3,4,5,6,7,8,9,10,11,12,0,14,15,16]))
    end
  end

  context "#lower_triangle!" do
    it "should create a copy with the lower corner set to zero" do
      n = NMatrix.seq(4)+1
      expect(n.lower_triangle!).to eq(NMatrix.new(4, [1,0,0,0,5,6,0,0,9,10,11,0,13,14,15,16]))
      n = NMatrix.seq(4)+1
      expect(n.lower_triangle!(2)).to eq(NMatrix.new(4, [1,2,3,0,5,6,7,8,9,10,11,12,13,14,15,16]))
    end
  end

  context "#rank" do
    it "should get the rank of a 2-dimensional matrix" do
      n = NMatrix.seq([2,3])
      expect(n.rank(0, 0)).to eq(N[[0,1,2]])
    end

    it "should raise an error when the rank is out of bounds" do
      n = NMatrix.seq([2,3])
      expect { n.rank(2, 0) }.to raise_error(RangeError)
    end
  end

  context "#reshape" do
    it "should change the shape of a matrix without the contents changing" do
      # FIXME
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      n = NMatrix.seq(4)+1
      expect(n.reshape([8,2]).to_flat_array).to eq(n.to_flat_array)
    end

    it "should permit a change of dimensionality" do
      # FIXME
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      n = NMatrix.seq(4)+1
      expect(n.reshape([8,1,2]).to_flat_array).to eq(n.to_flat_array)
    end

    it "should prevent a resize" do
      n = NMatrix.seq(4)+1
      expect { n.reshape([5,2]) }.to raise_error(ArgumentError)
    end

    it "should do the reshape operation in place" do
      n = NMatrix.seq(4)+1
      expect(n.reshape!([8,2]).eql?(n)).to eq(true) # because n itself changes
    end

    it "should do the reshape operation in place, changing dimension" do
      # FIXME
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      n = NMatrix.seq(4)
      a = n.reshape!([4,2,2])
      expect(n).to eq(NMatrix.seq([4,2,2]))
      expect(a).to eq(NMatrix.seq([4,2,2]))
    end

    it "reshape and reshape! must produce same result" do
      # FIXME
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      n = NMatrix.seq(4)+1
      a = NMatrix.seq(4)+1
      expect(n.reshape!([8,2])==a.reshape(8,2)).to eq(true) # because n itself changes
    end

    it "should prevent a resize in place" do
      n = NMatrix.seq(4)+1
      expect { n.reshape!([5,2]) }.to raise_error(ArgumentError)
    end
  end

  context "#transpose" do
    [:dense, :list, :yale].each do |stype|
      context(stype) do
        it "should transpose a #{stype} matrix (2-dimensional)" do
          n = NMatrix.seq(4, stype: stype)
          expect(n.transpose.to_a.flatten).to eq([0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15])
        end
      end
    end

    [:dense, :list].each do |stype|
      context(stype) do
        it "should transpose a #{stype} matrix (3-dimensional)" do
          n = NMatrix.new([4,4,1], [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15], stype: stype)
          expect(n.transpose([2,1,0]).to_flat_array).to eq([0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15])
          expect(n.transpose([1,0,2]).to_flat_array).to eq([0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15])
          expect(n.transpose([0,2,1]).to_flat_array).to eq(n.to_flat_array) # for dense, make this reshape!
        end
      end

      it "should just copy a 1-dimensional #{stype} matrix" do
        n = NMatrix.new([3], [1,2,3], stype: stype)
        expect(n.transpose).to eq n
        expect(n.transpose).not_to be n
      end

      it "should check permute argument if supplied for #{stype} matrix" do
        n = NMatrix.new([2,2], [1,2,3,4], stype: stype)
        expect{n.transpose *4 }.to raise_error(ArgumentError)
        expect{n.transpose [1,1,2] }.to raise_error(ArgumentError)
      end
    end
  end

  context "#dot_product" do
    [:dense].each do |stype| # list storage transpose not yet implemented
      context(stype) do # yale support only 2-dim matrix
        it "should work like vector product on a #{stype} (1-dimensional)" do
          # FIXME
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          m = NMatrix.new([3], [1,2,3], stype: stype)
          expect(m.dot(m)).to eq (NMatrix.new([1],[14]))
        end
      end
    end
  end

  context "#==" do
    [:dense, :list, :yale].each do |left|
      [:dense, :list, :yale].each do |right|
        context ("#{left}?#{right}") do
          it "tests equality of two equal matrices" do
            n = NMatrix.new([3,4], [0,0,1,2,0,0,3,4,0,0,0,0], stype: left)
            m = NMatrix.new([3,4], [0,0,1,2,0,0,3,4,0,0,0,0], stype: right)

            expect(n==m).to eq(true)
          end

          it "tests equality of two unequal matrices" do
            n = NMatrix.new([3,4], [0,0,1,2,0,0,3,4,0,0,0,1], stype: left)
            m = NMatrix.new([3,4], [0,0,1,2,0,0,3,4,0,0,0,0], stype: right)

            expect(n==m).to eq(false)
          end

          it "tests equality of matrices with different shapes" do
            n = NMatrix.new([2,2], [1,2, 3,4], stype: left)
            m = NMatrix.new([2,3], [1,2, 3,4, 5,6], stype: right)
            x = NMatrix.new([1,4], [1,2, 3,4], stype: right)

            expect{n==m}.to raise_error(ShapeError)
            expect{n==x}.to raise_error(ShapeError)
          end

          it "tests equality of matrices with different dimension" do
            n = NMatrix.new([2,1], [1,2], stype: left)
            m = NMatrix.new([2], [1,2], stype: right)

            expect{n==m}.to raise_error(ShapeError)
          end if left != :yale && right != :yale # yale must have dimension 2
        end
      end
    end
  end

  context "#concat" do
    it "should default to horizontal concatenation" do
      n = NMatrix.new([1,3], [1,2,3])
      expect(n.concat(n)).to eq(NMatrix.new([1,6], [1,2,3,1,2,3]))
    end

    it "should permit vertical concatenation" do
      n = NMatrix.new([1,3], [1,2,3])
      expect(n.vconcat(n)).to eq(NMatrix.new([2,3], [1,2,3]))
    end

    it "should permit depth concatenation on tensors" do
      # FIXME
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      n = NMatrix.new([1,3,1], [1,2,3])
      expect(n.dconcat(n)).to eq(NMatrix.new([1,3,2], [1,1,2,2,3,3]))
    end

    it "should work on matrices with different size along concat dim" do
      n = N[[1, 2, 3],
            [4, 5, 6]]
      m = N[[7],
            [8]]

      # FIXME
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      expect(n.hconcat(m)).to eq N[[1, 2, 3, 7], [4, 5, 6, 8]]
      expect(m.hconcat(n)).to eq N[[7, 1, 2, 3], [8, 4, 5, 6]]
    end

    it "should work on matrices with different size along concat dim" do
      n = N[[1, 2, 3],
            [4, 5, 6]]

      m = N[[7, 8, 9]]

      # FIXME
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      expect(n.vconcat(m)).to eq N[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
      expect(m.vconcat(n)).to eq N[[7, 8, 9], [1, 2, 3], [4, 5, 6]]
    end
  end

  context "#[]" do
    it "should return values based on indices" do
      n = NMatrix.new([2,5], [1,2,3,4,5,6,7,8,9,0])
      expect(n[1,0]).to eq 6
      expect(n[1,0..3]).to eq NMatrix.new([1,4],[6,7,8,9])
    end

    it "should work for negative indices" do
      n = NMatrix.new([1,5], [1,2,3,4,5])
      expect(n[-1]).to eq(5)
      expect(n[0,0..-2]).to eq(NMatrix.new([1,4],[1,2,3,4]))
    end
  end

  context "#complex_conjugate!" do
    [:dense, :yale, :list].each do |stype|
      context(stype) do
        it "should work in-place for complex dtypes" do
          pending("not yet implemented for list stype") if stype == :list
          pending("not yet implemented for Complex dtype for NMatrix-JRuby") if jruby?
          n = NMatrix.new([2,3], [Complex(2,3)], stype: stype, dtype: :complex128)
          n.complex_conjugate!
          expect(n).to eq(NMatrix.new([2,3], [Complex(2,-3)], stype: stype, dtype: :complex128))
        end

        [:object, :int64].each do |dtype|
          it "should work in-place for non-complex dtypes" do
            pending("not yet implemented for list stype") if stype == :list
            pending("not yet implemented for Complex dtype for NMatrix-JRuby") if jruby?
            n = NMatrix.new([2,3], 1, stype: stype, dtype: dtype)
            n.complex_conjugate!
            expect(n).to eq(NMatrix.new([2,3], [1], stype: stype, dtype: dtype))
          end
        end
      end
    end
  end

  context "#complex_conjugate" do
    [:dense, :yale, :list].each do |stype|
      context(stype) do
        it "should work out-of-place for complex dtypes" do
          pending("not yet implemented for list stype") if stype == :list
          pending("not yet implemented for Complex dtype for NMatrix-JRuby") if jruby?
          n = NMatrix.new([2,3], [Complex(2,3)], stype: stype, dtype: :complex128)
          expect(n.complex_conjugate).to eq(NMatrix.new([2,3], [Complex(2,-3)], stype: stype, dtype: :complex128))
        end

        [:object, :int64].each do |dtype|
          it "should work out-of-place for non-complex dtypes" do
            pending("not yet implemented for list stype") if stype == :list
            pending("not yet implemented for Complex dtype for NMatrix-JRuby") if jruby?
            n = NMatrix.new([2,3], 1, stype: stype, dtype: dtype)
            expect(n.complex_conjugate).to eq(NMatrix.new([2,3], [1], stype: stype, dtype: dtype))
          end
        end
      end
    end
  end

  context "#inject" do
    it "should sum columns of yale matrix correctly" do
      n = NMatrix.new([4, 3], stype: :yale, default: 0)
      n[0,0] = 1
      n[1,1] = 2
      n[2,2] = 4
      n[3,2] = 8
      column_sums = []
      n.cols.times do |i|
        column_sums << n.col(i).inject(:+)
      end
      expect(column_sums).to eq([1, 2, 12])
    end
  end

  context "#index" do
    it "returns index of first occurence of an element for a vector" do
      n = NMatrix.new([5], [0,22,22,11,11])

      expect(n.index(22)).to eq([1])
    end

    it "returns index of first occurence of an element for 2-D matrix" do
      n = NMatrix.new([3,3], [23,11,23,
                              44, 2, 0,
                              33, 0, 32])

      expect(n.index(0)).to eq([1,2])
    end

    it "returns index of first occerence of an element for N-D matrix" do
      n = NMatrix.new([3,3,3], [23,11,23, 44, 2, 0, 33, 0, 32,
                                23,11,23, 44, 2, 0, 33, 0, 32,
                                23,11,23, 44, 2, 0, 33, 0, 32])

      expect(n.index(44)).to eq([0,1,0])
    end
  end

  context "#last" do
    it "returns the last element of a 1-dimensional NMatrix" do
      n = NMatrix.new([1,4], [1,2,3,4])
      expect(n.last).to eq(4)
    end

    it "returns the last element of a 2-dimensional NMatrix" do
      n = NMatrix.new([2,2], [4,8,12,16])
      expect(n.last).to eq(16)
    end

    it "returns the last element of a 3-dimensional NMatrix" do
      n = NMatrix.new([2,2,2], [1,2,3,4,5,6,7,8])
      expect(n.last).to eq(8)
    end
  end

  context "#diagonal" do
    ALL_DTYPES.each do |dtype|
      before do
        @square_matrix =  NMatrix.new([3,3], [
          23,11,23,
          44, 2, 0,
          33, 0, 32
          ], dtype: dtype
        )

        @rect_matrix = NMatrix.new([4,3], [
          23,11,23,
          44, 2, 0,
          33, 0,32,
          11,22,33
          ], dtype: dtype
        )
      end

      it "returns main diagonal for square matrix" do
        expect(@square_matrix.diagonal).to eq(NMatrix.new [3], [23,2,32])
      end

      it "returns main diagonal for rectangular matrix" do
        expect(@rect_matrix.diagonal).to eq(NMatrix.new [3], [23,2,32])
      end

      it "returns anti-diagonal for square matrix" do
        expect(@square_matrix.diagonal(false)).to eq(NMatrix.new [3], [23,2,33])
      end

      it "returns anti-diagonal for rectangular matrix" do
        expect(@square_matrix.diagonal(false)).to eq(NMatrix.new [3], [23,2,33])
      end
    end
  end

  context "#repeat" do
    before do
      @sample_matrix = NMatrix.new([2, 2], [1, 2, 3, 4])
    end

    it "checks count argument" do
      expect{@sample_matrix.repeat(1, 0)}.to raise_error(ArgumentError)
      expect{@sample_matrix.repeat(-2, 0)}.to raise_error(ArgumentError)
    end

    it "returns repeated matrix" do
      pending("Not yet implemented for NMatrix JRuby") if jruby?
      expect(@sample_matrix.repeat(2, 0)).to eq(NMatrix.new([4, 2], [1, 2, 3, 4, 1, 2, 3, 4]))
      expect(@sample_matrix.repeat(2, 1)).to eq(NMatrix.new([2, 4], [1, 2, 1, 2, 3, 4, 3, 4]))
    end

    it "preserves dtype" do
      # FIXME
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      expect(@sample_matrix.repeat(2, 0).dtype).to eq(@sample_matrix.dtype)
      expect(@sample_matrix.repeat(2, 1).dtype).to eq(@sample_matrix.dtype)
    end
  end

  context "#meshgrid" do
    before do
      @x, @y, @z = [1, 2, 3], NMatrix.new([2, 1], [4, 5]), [6, 7]
      @two_dim = NMatrix.new([2, 2], [1, 2, 3, 4])
      @two_dim_array = [[4], [5]]
      @expected_result = [NMatrix.new([2, 3], [1, 2, 3, 1, 2, 3]), NMatrix.new([2, 3], [4, 4, 4, 5, 5, 5])]
      @expected_for_ij = [NMatrix.new([3, 2], [1, 1, 2, 2, 3, 3]), NMatrix.new([3, 2], [4, 5, 4, 5, 4, 5])]
      @expected_for_sparse = [NMatrix.new([1, 3], [1, 2, 3]), NMatrix.new([2, 1], [4, 5])]
      @expected_for_sparse_ij = [NMatrix.new([3, 1], [1, 2, 3]), NMatrix.new([1, 2], [4, 5])]
      # FIXME
      @expected_3dim = [NMatrix.new([1, 3, 1], [1, 2, 3]).repeat(2, 0).repeat(2, 2),
                        NMatrix.new([2, 1, 1], [4, 5]).repeat(3, 1).repeat(2, 2),
                        NMatrix.new([1, 1, 2], [6, 7]).repeat(2, 0).repeat(3, 1)] unless jruby?
      @expected_3dim_sparse_ij = [NMatrix.new([3, 1, 1], [1, 2, 3]),
                                  NMatrix.new([1, 2, 1], [4, 5]),
                                  NMatrix.new([1, 1, 2], [6, 7])]
    end

    it "checks arrays count" do
      pending("Not yet implemented for NMatrix JRuby") if jruby?
      expect{NMatrix.meshgrid([@x])}.to raise_error(ArgumentError)
      expect{NMatrix.meshgrid([])}.to raise_error(ArgumentError)
    end

    it "flattens input arrays before use" do
      pending("Not yet implemented for NMatrix JRuby") if jruby?
      expect(NMatrix.meshgrid([@two_dim, @two_dim_array])).to eq(NMatrix.meshgrid([@two_dim.to_flat_array, @two_dim_array.flatten]))
    end

    it "returns new NMatrixes" do
      pending("Not yet implemented for NMatrix JRuby") if jruby?
      expect(NMatrix.meshgrid([@x, @y])).to eq(@expected_result)
    end

    it "has option :sparse" do
      pending("Not yet implemented for NMatrix JRuby") if jruby?
      expect(NMatrix.meshgrid([@x, @y], sparse: true)).to eq(@expected_for_sparse)
    end

    it "has option :indexing" do
      pending("Not yet implemented for NMatrix JRuby") if jruby?
      expect(NMatrix.meshgrid([@x, @y], indexing: :ij)).to eq(@expected_for_ij)
      expect(NMatrix.meshgrid([@x, @y], indexing: :xy)).to eq(@expected_result)
      expect{NMatrix.meshgrid([@x, @y], indexing: :not_ij_not_xy)}.to raise_error(ArgumentError)
    end

    it "works well with both options set" do
      pending("Not yet implemented for NMatrix JRuby") if jruby?
      expect(NMatrix.meshgrid([@x, @y], sparse: true, indexing: :ij)).to eq(@expected_for_sparse_ij)
    end

    it "is able to take more than two arrays as arguments and works well with options" do
      pending("Not yet implemented for NMatrix JRuby") if jruby?
      expect(NMatrix.meshgrid([@x, @y, @z])).to eq(@expected_3dim)
      expect(NMatrix.meshgrid([@x, @y, @z], sparse: true, indexing: :ij)).to eq(@expected_3dim_sparse_ij)
    end
  end
end


================================================
FILE: spec/01_enum_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == 01_enum_spec.rb
#
# Enumerator tests for NMatrix. These should load early, as they
# test functionality essential to matrix printing.
#
require 'spec_helper'

describe "NMatrix enumeration for" do
  [:dense, :yale, :list].each do |stype|
    context stype do
      let(:n) { create_rectangular_matrix(stype) }
      let(:m) { n[1..4,1..3] }

      if stype == :yale
        it "should iterate properly along each row of a slice" do
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          vv = []
          ii = []
          jj = []
          m.extend NMatrix::YaleFunctions
          m.each_row do |row|
            row.each_with_indices do |v,i,j|
              vv << v
              ii << i
              jj << j
            end
          end

          expect(vv).to eq([7,8,9, 12,13,0, 0,0,0, 0,17,18])
          expect(ii).to eq([0]*12)
          expect(jj).to eq([0,1,2]*4)
        end

        it "should iterate along diagonal portion of A array" do
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          vv = []
          ii = []
          jj = []
          n.send :__yale_stored_diagonal_each_with_indices__ do |v,i,j|
            vv << v
            ii << i
            jj << j
          end
          expect(vv).to eq([1,7,13,0,19])
          expect(ii).to eq([0,1,2,3,4])
          expect(jj).to eq(ii)
        end

        it "should iterate along non-diagonal portion of A array" do
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          vv = []
          ii = []
          jj = []
          n.send :__yale_stored_nondiagonal_each_with_indices__ do |v,i,j|
            vv << v
            ii << i
            jj << j
          end

          expect(vv).to eq([2,3,4,5,  6,8,9,10,  11,12,14,15,  16,17,18,20])
          expect(ii).to eq([[0]*4, [1]*4, [2]*4, [4]*4].flatten)
          expect(jj).to eq([1,2,3,4,  0,2,3,5,   0,1,4,5,      0,2,3,5])
        end

        it "should iterate along a sliced diagonal portion of an A array" do
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          m = n[0..3,1..3]
          vv = []
          ii = []
          jj = []
          m.send :__yale_stored_diagonal_each_with_indices__ do |v,i,j|
            vv << v
            ii << i
            jj << j
          end
          expect(vv).to eq([7,13,0])
          expect(ii).to eq([1,2,3])
          expect(jj).to eq([0,1,2])
        end

        it "should iterate along a sliced non-diagonal portion of a sliced A array" do
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          vv = []
          ii = []
          jj = []
          n.extend NMatrix::YaleFunctions
          m.extend NMatrix::YaleFunctions
          m.send :__yale_stored_nondiagonal_each_with_indices__ do |v,i,j|
            vv << v
            ii << i
            jj << j
          end

          expect(ii).to eq([0,0, 1,   3,3 ])
          expect(jj).to eq([1,2, 0,   1,2 ])
          expect(vv).to eq([8,9, 12, 17,18])
        end

        it "should visit each stored element of the matrix in order by indices" do
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          vv = []
          ii = []
          jj = []
          n.each_ordered_stored_with_indices do |v,i,j|
            vv << v
            ii << i
            jj << j
          end

          expect(vv).to eq([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 16, 17, 18, 19, 20])
          expect(ii).to eq([[0]*5, [1]*5, [2]*5, [3]*1, [4]*5].flatten)
          expect(jj).to eq([0,1,2,3,4,  0,1,2,3,5,  0,1,2,4,5,  3,  0,2,3,4,5])
        end

        it "should visit each stored element of the slice in order by indices" do
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          vv = []
          ii = []
          jj = []
          m.each_ordered_stored_with_indices do |v,i,j|
            vv << v
            ii << i
            jj << j
          end
          expect(ii).to eq([0,0,0, 1,1,   2,  3,3  ])
          expect(jj).to eq([0,1,2, 0,1,   2,  1,2  ])
          expect(vv).to eq([7,8,9, 12,13, 0, 17,18 ])
        end
      end

      it "should visit each cell in the matrix as if dense, making indices available" do
        vv = []
        ii = []
        jj = []
        n.each_with_indices do |v,i,j|
          vv << v
          ii << i
          jj << j
        end

        expect(vv).to eq([1,2,3,4,5,0,6,7,8,9,0,10,11,12,13,0,14,15,0,0,0,0,0,0,16,0,17,18,19,20])
        expect(ii).to eq([[0]*6, [1]*6, [2]*6, [3]*6, [4]*6].flatten)
        expect(jj).to eq([0,1,2,3,4,5]*5)
      end

      it "should visit each cell in the slice as if dense, making indices available" do
        vv = []
        ii = []
        jj = []
        m.each_with_indices do |v,i,j|
          vv << v
          ii << i
          jj << j
        end
        expect(jj).to eq([0,1,2]*4)
        expect(ii).to eq([[0]*3, [1]*3, [2]*3, [3]*3].flatten)
        expect(vv).to eq([7,8,9,12,13,0,0,0,0,0,17,18])

      end

      if stype == :list or stype == :dense then
        it "should correctly map to a matrix with a single element" do
          nm = N.new([1], [2.0], stype: stype)
          expect(nm.map { |e| e**2 }).to eq N.new([1], [4.0], stype: stype)
        end

        it "should correctly map to a matrix with multiple elements" do
          nm = N.new([2], [2.0, 2.0], stype: stype)
          expect(nm.map { |e| e**2 }).to eq N.new([2], [4.0, 4.0], stype: stype)
        end
      end
    end
  end
end


================================================
FILE: spec/02_slice_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == 02_slice_spec.rb
#
# Test of slice operations. High priority tests since reference
# slicing is needed for pretty_print.
#
require 'spec_helper'

describe "Slice operation" do
  include RSpec::Longrun::DSL

  [:dense, :list, :yale].each do |stype|
    context "for #{stype}" do
        #GC.start # don't have to do this, but it helps to make sure we've cleaned up our pointers properly.
      let(:stype_matrix) { create_matrix(stype) }

      it "should correctly return a row of a reference-slice" do
        n = create_rectangular_matrix(stype)
        stype_matrix = n[1..4,1..3]
        expect(stype_matrix.row(1, :copy)).to eq(stype_matrix.row(1, :reference))
        expect(stype_matrix.row(1, :copy).to_flat_array).to eq([12,13,0])
      end

      if stype == :yale
        it "should binary search for the left boundary of a partial row of stored indices correctly" do
          #FIXME
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          n = NMatrix.new(10, stype: :yale, dtype: :int32)
          n[3,0] = 1
          #n[3,2] = 2
          n[3,3] = 3
          n[3,4] = 4
          n[3,6] = 5
          n[3,8] = 6
          n[3,9] = 7
          vs = []
          is = []
          js = []

          n[3,1..9].each_stored_with_indices do |v,i,j|
            vs << v
            is << i
            js << j
          end

          expect(vs).to eq([3,4,5,6,7])
          expect(js).to eq([2,3,5,7,8])
          expect(is).to eq([0,0,0,0,0])
        end
      elsif stype == :list
        it "should iterate across a partial row of stored indices" do
          vs = []
          is = []
          js = []

          STDERR.puts("now") if stype == :yale
          stype_matrix[2,1..2].each_stored_with_indices do |v,i,j|
            vs << v
            is << i
            js << j
          end

          expect(vs).to eq([7,8])
          expect(is).to eq([0,0])
          expect(js).to eq([0,1])
        end
      end

      unless stype == :dense
        it "should iterate across a row of stored indices" do
          #FIXME
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          vs = []
          is = []
          js = []
          stype_matrix[2,0..2].each_stored_with_indices do |v,i,j|
            vs << v
            is << i
            js << j
          end
          expect(vs).to eq(stype == :yale ? [8,6,7] : [6,7,8])
          expect(is).to eq([0,0,0])
          expect(js).to eq(stype == :yale ? [2,0,1] : [0,1,2])
        end

        it "should iterate across a submatrix of stored indices" do
          #FIXME
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          vs = []
          is = []
          js = []
          stype_matrix[0..1,1..2].each_stored_with_indices do |v,i,j|
            vs << v
            is << i
            js << j
          end

          expect(vs).to eq(stype == :yale ? [4,1,2,5] : [1,2,4,5])
          expect(is).to eq(stype == :yale ? [1,0,0,1] : [0,0,1,1])
          expect(js).to eq(stype == :yale ? [0,0,1,1] : [0,1,0,1])
        end
      end

      it "should return correct supershape" do
        pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
        x = NMatrix.random([10,12])
        y = x[0...8,5...12]
        expect(y.shape).to eq([8,7])
        expect(y.supershape).to eq([10,12])
      end

      it "should have #is_ref? method" do
        pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
        a = stype_matrix[0..1, 0..1]
        b = stype_matrix.slice(0..1, 0..1)
        expect(stype_matrix.is_ref?).to be false
        expect(a.is_ref?).to be true
        expect(b.is_ref?).to be false
      end

      it "reference should compare with non-reference" do
        expect(stype_matrix.slice(1..2,0..1)).to eq(stype_matrix[1..2, 0..1])
        expect(stype_matrix[1..2,0..1]).to eq(stype_matrix.slice(1..2, 0..1))
        expect(stype_matrix[1..2,0..1]).to eq(stype_matrix[1..2, 0..1])
      end

      context "with copying" do
        it 'should return an NMatrix' do
          n = stype_matrix.slice(0..1,0..1)
          expect(nm_eql(n, NMatrix.new([2,2], [0,1,3,4], dtype: :int32))).to be true
        end

        it 'should return a copy of 2x2 matrix to self elements' do
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          n = stype_matrix.slice(1..2,0..1)
          expect(n.shape).to eql([2,2])

          expect(n[1,1]).to eq(stype_matrix[2,1])
          n[1,1] = -9
          expect(stype_matrix[2,1]).to eql(7)
        end

        it 'should return a 1x2 matrix without refs to self elements' do
          #FIXME
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          n = stype_matrix.slice(0,1..2)
          expect(n.shape).to eql([1,2])

          expect(n[0]).to eq(stype_matrix[0,1])
          expect(n[1]).to eq(stype_matrix[0,2])
          n[0] = -9
          expect(stype_matrix[0,1]).to eql(1)
          expect(stype_matrix[0,2]).to eql(2)
        end

        it 'should return a 2x1 matrix without refs to self elements' do
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          stype_matrix.extend NMatrix::YaleFunctions

          n = stype_matrix.slice(0..1,1)
          expect(n.shape).to eql([2,1])

          expect(n[0]).to eq(stype_matrix[0,1])
          expect(n[1]).to eq(stype_matrix[1,1])
          n[0] = -9
          expect(stype_matrix[0,1]).to eql(1)
          expect(stype_matrix[1,1]).to eql(4)
        end

        it 'should be correct slice for range 0..2 and 0...3' do
          expect(stype_matrix.slice(0..2,0..2)).to eq(stype_matrix.slice(0...3,0...3))
        end

        [:dense, :list, :yale].each do |cast_type|
          it "should cast copied slice from #{stype.upcase} to #{cast_type.upcase}" do
            expect(nm_eql(stype_matrix.slice(1..2, 1..2).cast(cast_type, :int32), stype_matrix.slice(1..2,1..2))).to be true
            expect(nm_eql(stype_matrix.slice(0..1, 1..2).cast(cast_type, :int32), stype_matrix.slice(0..1,1..2))).to be true
            expect(nm_eql(stype_matrix.slice(1..2, 0..1).cast(cast_type, :int32), stype_matrix.slice(1..2,0..1))).to be true
            expect(nm_eql(stype_matrix.slice(0..1, 0..1).cast(cast_type, :int32), stype_matrix.slice(0..1,0..1))).to be true

            # Non square
            expect(nm_eql(stype_matrix.slice(0..2, 1..2).cast(cast_type, :int32), stype_matrix.slice(0..2,1..2))).to be true
            #require 'pry'
            #binding.pry if cast_type == :yale
            expect(nm_eql(stype_matrix.slice(1..2, 0..2).cast(cast_type, :int32), stype_matrix.slice(1..2,0..2))).to be true

            # Full
            expect(nm_eql(stype_matrix.slice(0..2, 0..2).cast(cast_type, :int32), stype_matrix)).to be true
          end
        end
      end

      # Yale:
      #context "by copy" do
        #it "should correctly preserve zeros" do
        #  stype_matrix = NMatrix.new(:yale, 3, :int64)
        #  column_slice = stype_matrix.column(2, :copy)
        #  column_slice[0].should == 0
        #  column_slice[1].should == 0
        #  column_slice[2].should == 0
        #end
      #end

      context "by reference" do
        it 'should return an NMatrix' do
          n = stype_matrix[0..1,0..1]
          expect(nm_eql(n, NMatrix.new([2,2], [0,1,3,4], dtype: :int32))).to be true
        end

        it 'should return a 2x2 matrix with refs to self elements' do
          #FIXME
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby? # and :cast_type != :dense
          n = stype_matrix[1..2,0..1]
          expect(n.shape).to eql([2,2])

          expect(n[0,0]).to eq(stype_matrix[1,0])
          n[0,0] = -9
          expect(stype_matrix[1,0]).to eql(-9)
        end

        it 'should return a 1x2 vector with refs to self elements' do
          #FIXME
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby? # and :cast_type != :dense
          n = stype_matrix[0,1..2]
          expect(n.shape).to eql([1,2])

          expect(n[0]).to eq(stype_matrix[0,1])
          n[0] = -9
          expect(stype_matrix[0,1]).to eql(-9)
        end

        it 'should return a 2x1 vector with refs to self elements' do
          pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
          n = stype_matrix[0..1,1]
          expect(n.shape).to eql([2,1])

          expect(n[0]).to eq(stype_matrix[0,1])
          n[0] = -9
          expect(stype_matrix[0,1]).to eql(-9)
        end

        it 'should slice again' do
          n = stype_matrix[1..2, 1..2]
          expect(nm_eql(n[1,0..1], NVector.new(2, [7,8], dtype: :int32).transpose)).to be true
        end

        it 'should be correct slice for range 0..2 and 0...3' do
          expect(stype_matrix[0..2,0..2]).to eq(stype_matrix[0...3,0...3])
        end

        it 'should correctly handle :* slice notation' do
          expect(stype_matrix[:*,0]).to eq stype_matrix[0...stype_matrix.shape[0], 0]
        end

        if stype == :dense
          [:byte,:int8,:int16,:int32,:int64,:float32,:float64].each do |left_dtype|
            [:byte,:int8,:int16,:int32,:int64,:float32,:float64].each do |right_dtype|

              # Won't work if they're both 1-byte, due to overflow.
              next if [:byte,:int8].include?(left_dtype) && [:byte,:int8].include?(right_dtype)

              # For now, don't bother testing int-int mult.
              #next if [:int8,:int16,:int32,:int64].include?(left_dtype) && [:int8,:int16,:int32,:int64].include?(right_dtype)
              it "handles #{left_dtype.to_s} dot #{right_dtype.to_s} matrix multiplication" do
                #STDERR.puts "dtype=#{dtype.to_s}"
                #STDERR.puts "2"

                nary = if left_dtype.to_s =~ /complex/
                         COMPLEX_MATRIX43A_ARRAY
                       else
                         MATRIX43A_ARRAY
                       end

                mary = if right_dtype.to_s =~ /complex/
                         COMPLEX_MATRIX32A_ARRAY
                       else
                         MATRIX32A_ARRAY
                       end

                n = NMatrix.new([4,3], nary, dtype: left_dtype)[1..3,1..2]
                m = NMatrix.new([3,2], mary, dtype: right_dtype)[1..2,0..1]

                r = n.dot m
                expect(r.shape).to eql([3,2])

                expect(r[0,0]).to eq(219.0)
                expect(r[0,1]).to eq(185.0)
                expect(r[1,0]).to eq(244.0)
                expect(r[1,1]).to eq(205.0)
                expect(r[2,0]).to eq(42.0)
                expect(r[2,1]).to eq(35.0)

              end
            end
          end

          context "operations" do

            it "correctly transposes slices" do
              expect(stype_matrix[0...3,0].transpose).to eq NMatrix[[0, 3, 6]]
              expect(stype_matrix[0...3,1].transpose).to eq NMatrix[[1, 4, 7]]
              expect(stype_matrix[0...3,2].transpose).to eq NMatrix[[2, 5, 8]]
              expect(stype_matrix[0,0...3].transpose).to eq NMatrix[[0], [1], [2]]
              expect(stype_matrix[1,0...3].transpose).to eq NMatrix[[3], [4], [5]]
              expect(stype_matrix[2,0...3].transpose).to eq NMatrix[[6], [7], [8]]
              expect(stype_matrix[1..2,1..2].transpose).to eq NMatrix[[4, 7], [5, 8]]
            end

            it "adds slices" do
              expect(NMatrix[[0,0,0]] + stype_matrix[1,0..2]).to eq NMatrix[[3, 4, 5]]
            end

            it "scalar adds to slices" do
              expect(stype_matrix[1,0..2]+1).to eq NMatrix[[4, 5, 6]]
            end

            it "compares slices to scalars" do
              #FIXME
              pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
              (stype_matrix[1, 0..2] > 2).each { |e| expect(e != 0).to be true }
            end

            it "iterates only over elements in the slice" do
              els = []
              stype_matrix[1, 0..2].each { |e| els << e }
              expect(els.size).to eq 3
              expect(els[0]).to eq 3
              expect(els[1]).to eq 4
              expect(els[2]).to eq 5
            end

            it "iterates with index only over elements in the slice" do
              els = []
              stype_matrix[1, 0..2].each_stored_with_indices { |a| els << a }
              expect(els.size).to eq 3
              expect(els[0]).to eq [3, 0, 0]
              expect(els[1]).to eq [4, 0, 1]
              expect(els[2]).to eq [5, 0, 2]
            end

          end

        end

        example 'should be cleaned up by garbage collector without errors'  do
          step "reference slice" do
            1.times do
              n = stype_matrix[1..2,0..1]
            end
            GC.start
          end

          step "reference slice of casted-copy" do
            expect(stype_matrix).to eq(NMatrix.new([3,3], (0..9).to_a, dtype: :int32).cast(stype, :int32))
            n = nil
            1.times do
              m = NMatrix.new([2,2], [1,2,3,4]).cast(stype, :int32)
              n = m[0..1,0..1]
            end
            GC.start
            expect(n).to eq(NMatrix.new([2,2], [1,2,3,4]).cast(stype, :int32))
          end
        end

        [:dense, :list, :yale].each do |cast_type|
          it "should cast a square reference-slice from #{stype.upcase} to #{cast_type.upcase}" do
            expect(nm_eql(stype_matrix[1..2, 1..2].cast(cast_type), stype_matrix[1..2,1..2])).to be true
            expect(nm_eql(stype_matrix[0..1, 1..2].cast(cast_type), stype_matrix[0..1,1..2])).to be true
            expect(nm_eql(stype_matrix[1..2, 0..1].cast(cast_type), stype_matrix[1..2,0..1])).to be true
            expect(nm_eql(stype_matrix[0..1, 0..1].cast(cast_type), stype_matrix[0..1,0..1])).to be true
          end

          it "should cast a rectangular reference-slice from #{stype.upcase} to #{cast_type.upcase}" do
            # Non square
            expect(nm_eql(stype_matrix[0..2, 1..2].cast(cast_type), stype_matrix[0..2,1..2])).to be true # FIXME: memory problem.
            expect(nm_eql(stype_matrix[1..2, 0..2].cast(cast_type), stype_matrix[1..2,0..2])).to be true # this one is fine
          end

          it "should cast a square full-matrix reference-slice from #{stype.upcase} to #{cast_type.upcase}" do
            expect(nm_eql(stype_matrix[0..2, 0..2].cast(cast_type), stype_matrix)).to be true
          end
        end
      end
    end
  end
end


================================================
FILE: spec/03_nmatrix_monkeys_spec.rb
================================================
require 'spec_helper'

describe NMatrix do
  describe "#to_a" do
    it "creates an Array with the same dimensions" do
      n = NMatrix.seq([3,2])
      expect(n.to_a).to eq([[0, 1], [2, 3], [4, 5]])
    end

    it "creates an Array with the proper element type" do
      n = NMatrix.seq([3,2], dtype: :float64)
      expect(n.to_a).to eq([[0.0, 1.0], [2.0, 3.0], [4.0, 5.0]])
    end

    it "properly interprets list matrices" do
      n = NMatrix.seq([3,2], stype: :list)
      expect(n.to_a).to eq([[0, 1], [2, 3], [4, 5]])
    end

    it "properly interprets yale matrices" do
      n = NMatrix.seq([3,2], stype: :yale)
      expect(n.to_a).to eq([[0, 1], [2, 3], [4, 5]])
    end
  end
end

describe Array do
  describe "#to_nm" do
    # [0, 1, 2, 3, 4, 5]
    let(:a) {(0..5).to_a}

    it "uses a given shape and type" do
      expect(a.to_nm([3,2]).dtype).to eq :int64
      expect(a.to_nm([3,2])).to eq(NMatrix.seq([3,2]))
    end

    it "guesses dtype based on first element" do
      a[0] = 0.0
      expect(a.to_nm([3,2]).dtype).to eq :float64
    end

    it "defaults to dtype :object if necessary" do
      #FIXME
      pending("not yet implemented for object dtype for NMatrix-JRuby") if jruby?
      a = %w(this is an array of strings)
      expect(a.to_nm([3,2]).dtype).to eq :object
      expect(a.to_nm([3,2])).to eq(NMatrix.new([3,2], a, dtype: :object))
    end

    it "attempts to intuit the shape of the Array" do
      a = [[0, 1], [2, 3], [4, 5]]
      expect(a.to_nm).to eq(NMatrix.new([3,2], a.flatten))
      expect(a.to_nm.dtype).to eq :int64
    end

    it "creates an object Array for inconsistent dimensions" do
      a = [[0, 1, 2], [3], [4, 5]]
      expect(a.to_nm).to eq(NMatrix.new([3], a, dtype: :object))
      expect(a.to_nm.dtype).to eq :object
    end

    it "intuits shape of Array into multiple dimensions" do
      a = [[[0], [1]], [[2], [3]], [[4], [5]]]
      expect(a.to_nm).to eq(NMatrix.new([3,2,1], a.flatten))
      expect(a).to eq(a.to_nm.to_a)
    end

    it "is reflective with NMatrix#to_a" do
      a = [[0, 1, 2], [3], [4, 5]]
      expect(a).to eq(a.to_nm.to_a)
    end

    it "does not permanently alter the Array" do
      a = [[0, 1], [2, 3], [4, 5]]
      expect(a.to_nm).to eq(NMatrix.new([3,2], a.flatten))
      expect(a).to eq([[0, 1], [2, 3], [4, 5]])
    end
  end
end


================================================
FILE: spec/blas_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == blas_spec.rb
#
# Tests for properly exposed BLAS functions.
#

require 'spec_helper'

describe NMatrix::BLAS do
  [:byte, :int8, :int16, :int32, :int64,
   :float32, :float64, :complex64, :complex128,
   :object
  ].each do |dtype|
    context dtype do
      it "exposes cblas_scal" do
        x = NMatrix.new([3, 1], [1, 2, 3], dtype: dtype)
        NMatrix::BLAS.cblas_scal(3, 2, x, 1)
        expect(x).to eq(NMatrix.new([3, 1], [2, 4, 6], dtype: dtype))
      end

      it "exposes cblas_imax" do
        u = NMatrix.new([3,1], [1, 4, 3], dtype: dtype)
        index = NMatrix::BLAS.cblas_imax(3, u, 1)
        expect(index).to eq(1)
      end
    end
  end

  [:float32, :float64, :complex64, :complex128].each do |dtype|
    context dtype do
      # This is not the same as "exposes cblas trsm", which would be for a version defined in blas.rb (which
      # would greatly simplify the calling of cblas_trsm in terms of arguments, and which would be accessible
      # as NMatrix::BLAS::trsm)
      it "exposes unfriendly cblas_trsm" do
        a     = NMatrix.new(3, [4,-1.0/2, -3.0/4, -2, 2, -1.0/4, -4, -2, -1.0/2], dtype: dtype)
        b     = NMatrix.new([3,1], [-1, 17, -9], dtype: dtype)
        NMatrix::BLAS::cblas_trsm(:row, :right, :lower, :transpose, :nonunit, 1, 3, 1.0, a, 3, b, 3)

        # These test results all come from actually running a matrix through BLAS. We use them to ensure that NMatrix's
        # version of these functions give similar results.

        expect(b[0]).to eq(-1.0/4)
        expect(b[1]).to eq(33.0/4)
        expect(b[2]).to eq(-13)

        NMatrix::BLAS::cblas_trsm(:row, :right, :upper, :transpose, :unit, 1, 3, 1.0, a, 3, b, 3)

        expect(b[0]).to eq(-15.0/2)
        expect(b[1]).to eq(5)
        expect(b[2]).to eq(-13)
        
        NMatrix::BLAS::cblas_trsm(:row, :left, :lower, :transpose, :nounit, 3, 1, 1.0, a, 3, b, 1)

        expect(b[0]).to eq(307.0/8)
        expect(b[1]).to eq(57.0/2)
        expect(b[2]).to eq(26.0)
        
        NMatrix::BLAS::cblas_trsm(:row, :left, :upper, :transpose, :unit, 3, 1, 1.0, a, 3, b, 1)

        expect(b[0]).to eq(307.0/8)
        expect(b[1]).to eq(763.0/16)
        expect(b[2]).to eq(4269.0/64)        
      end

      # trmm multiplies two matrices, where one of the two is required to be
      # triangular
      it "exposes cblas_trmm" do
        a = NMatrix.new([3,3], [1,1,1, 0,1,2, 0,0,-1], dtype: dtype)
        b = NMatrix.new([3,3], [1,2,3, 4,5,6, 7,8,9], dtype: dtype)

        begin
          NMatrix::BLAS.cblas_trmm(:row, :left, :upper, false, :not_unit, 3, 3, 1, a, 3, b, 3)
        rescue NotImplementedError => e
          pending e.to_s
        end

        product = NMatrix.new([3,3], [12,15,18, 18,21,24, -7,-8,-9], dtype: dtype)
        expect(b).to eq(product)
      end
    end
  end

  #should have a separate test for complex
  [:float32, :float64, :complex64, :complex128, :object].each do |dtype|
    context dtype do

      it "exposes cblas rot" do
        x = NMatrix.new([5,1], [1,2,3,4,5], dtype: dtype)
        y = NMatrix.new([5,1], [-5,-4,-3,-2,-1], dtype: dtype)
        x, y = NMatrix::BLAS::rot(x, y, 1.0/2, Math.sqrt(3)/2, -1)

        expect(x).to be_within(1e-4).of(
                   NMatrix.new([5,1], [-0.3660254037844386, -0.7320508075688772, -1.098076211353316, -1.4641016151377544, -1.8301270189221928], dtype: dtype)
                 )

        expect(y).to be_within(1e-4).of(
                   NMatrix.new([5,1], [-6.830127018922193, -5.464101615137754, -4.098076211353316, -2.732050807568877, -1.3660254037844386], dtype: dtype)
                 )
      end

    end
  end

  [:float32, :float64, :complex64, :complex128, :object].each do |dtype|
    context dtype do

      it "exposes cblas rotg" do
        pending("broken for :object") if dtype == :object

        ab = NMatrix.new([2,1], [6,-8], dtype: dtype)
        begin
          c,s = NMatrix::BLAS::rotg(ab)
        rescue NotImplementedError => e
          pending e.to_s
        end

        if [:float32, :float64].include?(dtype)
          expect(ab[0]).to be_within(1e-6).of(-10)
          expect(ab[1]).to be_within(1e-6).of(-5.0/3)
          expect(c).to be_within(1e-6).of(-3.0/5)
        else
          pending "need correct test cases"
          expect(ab[0]).to be_within(1e-6).of(10)
          expect(ab[1]).to be_within(1e-6).of(5.0/3)
          expect(c).to be_within(1e-6).of(3.0/5)
        end
        expect(s).to be_within(1e-6).of(4.0/5)
      end

      # Note: this exposes gemm, not cblas_gemm (which is the unfriendly CBLAS no-error-checking version)
      it "exposes gemm" do
        n = NMatrix.new([4,3], [14.0,9.0,3.0, 2.0,11.0,15.0, 0.0,12.0,17.0, 5.0,2.0,3.0], dtype: dtype)
        m = NMatrix.new([3,2], [12.0,25.0, 9.0,10.0, 8.0,5.0], dtype: dtype)

        #c = NMatrix.new([4,2], dtype)
        r = NMatrix::BLAS.gemm(n, m) #, c)
        #c.should equal(r) # check that both are same memory address

        expect(r).to eq(NMatrix.new([4,2], [273,455,243,235,244,205,102,160], dtype: dtype))
      end

      it "exposes gemv" do
        a = NMatrix.new([4,3], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0], dtype: dtype)
        x = NMatrix.new([3,1], [2.0, 1.0, 0.0], dtype: dtype)
        y = NMatrix::BLAS.gemv(a, x)
        expect(y).to eq(NMatrix.new([4,1],[4.0,13.0,22.0,31.0],dtype: dtype))
      end

      it "exposes asum" do
        pending("broken for :object") if dtype == :object

        x = NMatrix.new([4,1], [-1,2,3,4], dtype: dtype)
        expect(NMatrix::BLAS.asum(x)).to eq(10)
      end

      it "exposes asum for single element" do
        if [:complex64,:complex128].include?(dtype)
          x = NMatrix.new([1], [Complex(-3,2)], dtype: dtype)
          expect(x.asum).to eq(5.0)
        else
          x = NMatrix.new([1], [-1], dtype: dtype)
          expect(x.asum).to eq(1.0)
        end
      end

      it "exposes nrm2" do
        pending("broken for :object") if dtype == :object

        if dtype =~ /complex/
          x = NMatrix.new([3,1], [Complex(1,2),Complex(3,4),Complex(0,6)], dtype: dtype)
          y = NMatrix.new([3,1], [Complex(0,0),Complex(0,0),Complex(0,0)], dtype: dtype)
          nrm2 = 8.12403840463596
        else
          x = NMatrix.new([4,1], [2,-4,3,5], dtype: dtype)
          y = NMatrix.new([3,1], [0,0,0], dtype: dtype)
          nrm2 = 5.385164807134504
        end
        
        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-14
                else
                  1e-14
              end

        expect(NMatrix::BLAS.nrm2(x, 1, 3)).to be_within(err).of(nrm2)
        expect(NMatrix::BLAS.nrm2(y, 1, 3)).to be_within(err).of(0)
      end

    end
  end
end


================================================
FILE: spec/elementwise_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == nmatrix_spec.rb
#
# Element-wise operation tests.
#

require 'spec_helper'

describe NMatrix do
  context "yale" do
    before :each do
      @n = NMatrix.new(3, stype: :yale, dtype: :int64)
      @n.extend NMatrix::YaleFunctions unless jruby?
      @m = NMatrix.new(3, stype: :yale, dtype: :int64)
      @n[0,0] = 52
      @n[0,2] = 5
      @n[1,1] = 40
      @n[0,1] = 30
      @n[2,0] = 6
      @m[1,1] = -48
      @m[0,2] = -5
      @n.extend NMatrix::YaleFunctions unless jruby?
    end

    it "should perform scalar math" do
      pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
      x = @n * 3
      expect(x[0,0]).to eq(52 * 3)
      expect(x[0,1]).to eq(30 * 3)
      expect(x[0,2]).to eq(5 * 3)
      expect(x[1,1]).to eq(40 * 3)
      expect(x[2,0]).to eq(6 * 3)

      r = NMatrix.new(3, stype: :yale, dtype: :int64)
      y = r + 3
      expect(y[0,0]).to eq(3)
    end

    it "should refuse to perform a dot operation on a yale with non-zero default" do
      pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
      r = NMatrix.new(3, stype: :yale, dtype: :int64)
      y = r + 3
      expect { y.dot(r) }.to raise_error
      expect { r.dot(y) }.to raise_error
    end

    it "should perform element-wise addition" do
      expect(@n+@m).to eq(NMatrix.new(:dense, 3, [52,30,0,0,-8,0,6,0,0], :int64).cast(:yale, :int64))
    end

    it "should perform element-wise subtraction" do
      expect(@n-@m).to eq(NMatrix.new(:dense, 3, [52,30,10,0,88,0,6,0,0], :int64).cast(:yale, :int64))
    end

    it "should perform element-wise multiplication" do
      r = NMatrix.new(:dense, 3, [0,0,-25,0,-1920,0,0,0,0], :int64).cast(:yale, :int64)
      m = NMatrix.new(2, stype: :yale, dtype: :int64)
      expect(@n*@m).to eq(r)
    end

    it "should perform element-wise division" do
      pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
      r = NMatrix.new(:dense, 3, [52, 30, -2, 0, -1, 0, 6, 0, 0], :int64).cast(:yale, :int64)
      expect(@n/(@m+1)).to eq(r)
    end

    it "should perform element-wise modulo" do
      pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
      m = NMatrix.new(3, stype: :yale, dtype: :int64, default: 0) + 5
      expect(@n % m).to eq(NMatrix.new(:dense, 3, [2,0,0,0,0,0,1,0,0], :int64).cast(:yale, :int64))
    end

    it "should handle element-wise equality (=~)" do
      expect(@n =~ @m).to eq(NMatrix.new(:dense, 3, [false,false,false,true,false,true,false,true,true], :object).cast(:yale, :object, false))
    end

    it "should handle element-wise inequality (!~)" do
      expect(@n !~ @m).to eq(NMatrix.new(:dense, 3, [true,true,true,false,true,false,true,false,false], :object).cast(:yale, :object, true))
    end

    it "should handle element-wise less-than (<)" do
      expect(@m < @n).to eq(NMatrix.new(:dense, 3, [true,true,true,false,true,false,true,false,false], :object).cast(:yale, :object, true))
    end

    it "should handle element-wise greater-than (>)" do
      expect(@n > @m).to eq(NMatrix.new(:dense, 3, [true,true,true,false,true,false,true,false,false], :object).cast(:yale, :object, false))
    end

    it "should handle element-wise greater-than-or-equals (>=)" do
      expect(@n >= @m).to eq(NMatrix.new(:dense, 3, true, :object).cast(:yale,:object, true))
    end

    it "should handle element-wise less-than-or-equals (<=)" do
      r = NMatrix.new(:dense, 3, [false,false,false,true,false,true,false,true,true], :object).cast(:yale, :object, false)
      expect(@n <= @m).to eq(r)
    end
  end


  context "list" do
    before :each do
      @n = NMatrix.new(:list, 2, 0, :int64)
      @m = NMatrix.new(:list, 2, 0, :int64)
      @n[0,0] = 52
      @m[1,1] = -48
      @n[1,1] = 40
    end

    it "should perform scalar math" do
      pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
      x = @n * 3
      expect(x[0,0]).to eq(52 * 3)
      expect(x[1,1]).to eq(40 * 3)
      expect(x[0,1]).to eq(0)

      r = NMatrix.new(3, stype: :list, default: 1)
      y = r + 3
      expect(y[0,0]).to eq(4)
    end

    it "should perform element-wise addition" do
      r = NMatrix.new(2, stype: :list, dtype: :int64, default: 0)
      r[0,0] = 52
      r[1,1] = -8
      q = @n + @m
      expect(q).to eq(r)
    end

    it "should perform element-wise subtraction" do
      r = NMatrix.new(:dense, 2, [52, 0, 0, 88], :int64).cast(:list, :int64)
      expect(@n-@m).to eq(r)
    end

    it "should perform element-wise multiplication" do
      r = NMatrix.new(:dense, 2, [52, 0, 0, -1920], :int64).cast(:list, :int64)
      m = NMatrix.new(:list, 2, 1, :int64)
      m[1,1] = -48
      expect(@n*m).to eq(r)
    end

    it "should perform element-wise division" do
      m = NMatrix.new(:list, 2, 1, :int64)
      m[1,1] = 2
      r = NMatrix.new(:dense, 2, [52, 0, 0, 20], :int64).cast(:list, :int64)
      expect(@n/m).to eq(r)
    end

    it "should perform element-wise modulo" do
      pending("not yet implemented for sparse matrices for NMatrix-JRuby") if jruby?
      m = NMatrix.new(:list, 2, 1, :int64)
      m[0,0] = 50
      m[1,1] = 40
      (@n % m)
    end

    it "should handle element-wise equality (=~)" do
      r = NMatrix.new(:list, 2, false, :object)
      r[0,1] = true
      r[1,0] = true

      expect(@n =~ @m).to eq(r)
    end

    it "should handle element-wise inequality (!~)" do
      r = NMatrix.new(:list, 2, false, :object)
      r[0,0] = true
      r[1,1] = true

      expect(@n !~ @m).to eq(r)
    end

    it "should handle element-wise less-than (<)" do
      expect(@n < @m).to eq(NMatrix.new(:list, 2, false, :object))
    end

    it "should handle element-wise greater-than (>)" do
      r = NMatrix.new(:list, 2, false, :object)
      r[0,0] = true
      r[1,1] = true
      expect(@n > @m).to eq(r)
    end

    it "should handle element-wise greater-than-or-equals (>=)" do
      expect(@n >= @m).to eq(NMatrix.new(:list, 2, true, :object))
    end

    it "should handle element-wise less-than-or-equals (<=)" do
      r = NMatrix.new(:list, 2, false, :object)
      r[0,1] = true
      r[1,0] = true
      expect(@n <= @m).to eq(r)
    end
  end

  context "dense" do
    context "scalar arithmetic" do
      before :each do
        @n = NMatrix.new(:dense, 2, [1,2,3,4], :int64)
      end

      it "works for integers" do
        expect(@n+1).to eq(NMatrix.new(:dense, 2, [2,3,4,5], :int64))
      end

      #it "works for complex64" do
      #  n = @n.cast(:dtype => :complex64)
      #  (n + 10.0).to_a.should == [Complex(11.0), Complex(12.0), Complex(13.0), Complex(14.0)]
      #end
    end

    context "elementwise arithmetic" do
      before :each do
        @n = NMatrix.new(:dense, 2, [1,2,3,4], :int64)
        @m = NMatrix.new(:dense, 2, [-4,-1,0,66], :int64)
      end

      it "adds" do
        r = @n+@m
        expect(r).to eq(NMatrix.new(:dense, [2,2], [-3, 1, 3, 70], :int64))
      end

      it "subtracts" do
        r = @n-@m
        expect(r).to eq(NMatrix.new(:dense, [2,2], [5, 3, 3, -62], :int64))
      end

      it "multiplies" do
        r = @n*@m
        expect(r).to eq(NMatrix.new(:dense, [2,2], [-4, -2, 0, 264], :int64))
      end

      it "divides in the Ruby way" do
        pending("not yet implemented int dtype for NMatrix-JRuby") if jruby?
        m = @m.clone
        m[1,0] = 3
        r = @n/m
        expect(r).to eq(NMatrix.new(:dense, [2,2], [-1, -2, 1, 0], :int64))
      end

      it "exponentiates" do
        r = @n ** 2
        # TODO: We might have problems with the dtype.
        expect(r).to eq(NMatrix.new(:dense, [2,2], [1, 4, 9, 16], :int64))
      end

      it "modulo" do
        pending("not yet implemented int dtype for NMatrix-JRuby") if jruby?
        expect(@n % (@m + 2)).to eq(NMatrix.new(:dense, [2,2], [-1, 0, 1, 4], :int64))
      end
    end

    context "elementwise comparisons" do
      before :each do
        @n = NMatrix.new(:dense, 2, [1,2,3,4], :int64)
        @m = NMatrix.new(:dense, 2, [-4,-1,3,2], :int64)
      end

      it "equals" do
        r = @n =~ @m
        expect(r).to eq(NMatrix.new(:dense, [2,2], [false, false, true, false], :object))
      end

      it "is not equal" do
        r = @n !~ @m
        expect(r).to eq(NMatrix.new(:dense, [2,2], [true, true, false, true], :object))
      end

      it "is less than" do
        r = @n < @m
        expect(r).to eq(NMatrix.new(:dense, [2,2], false, :object))
      end

      it "is greater than" do
        r = @n > @m
        expect(r).to eq(NMatrix.new(:dense, [2,2], [true, true, false, true], :object))
      end

      it "is less than or equal to" do
        r = @n <= @m
        expect(r).to eq(NMatrix.new(:dense, [2,2], [false, false, true, false], :object))
      end

      it "is greater than or equal to" do
        n = NMatrix.new(:dense, [2,2], [1, 2, 2, 4], :int64)
        r = n >= @m
        expect(r).to eq(NMatrix.new(:dense, [2,2], [true, true, false, true], :object))
      end
    end
  end
end


================================================
FILE: spec/homogeneous_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == homogeneous_spec.rb
#
# Specs for the homogeneous transformation matrix methods.
#

require 'spec_helper'
require "./lib/nmatrix/homogeneous.rb"

require 'pry'

describe 'NMatrix' do
  context ".x_rotation" do
    it "should generate a matrix representing a rotation about the x axis" do
      x = NMatrix.x_rotation(Math::PI/6)
      expect(x).to be_within(1e-8).of(NMatrix.new([4,4], [1.0, 0.0, 0.0, 0.0,
                                                      0.0, Math.cos(Math::PI/6), -0.5, 0.0,
                                                      0.0, 0.5, Math.cos(Math::PI/6), 0.0,
                                                      0.0, 0.0, 0.0, 1.0] ))
    end
  end


  context ".y_rotation" do
    it "should generate a matrix representing a rotation about the y axis" do
      y = NMatrix.y_rotation(Math::PI/6)
      expect(y).to be_within(1e-8).of(NMatrix.new([4,4], [Math.cos(Math::PI/6), 0.0, 0.5, 0.0,
                                                      0.0, 1.0, 0.0, 0.0,
                                                     -0.5, 0.0, Math.cos(Math::PI/6), 0.0,
                                                      0.0, 0.0, 0.0, 1.0] ))
    end
  end

  context ".z_rotation" do
    it "should generate a matrix representing a rotation about the z axis" do
      z = NMatrix.z_rotation(Math::PI/6)
      expect(z).to be_within(1e-8).of(NMatrix.new([4,4], [Math.cos(Math::PI/6), -0.5, 0.0, 0.0,
                                                      0.5, Math.cos(Math::PI/6), 0.0, 0.0,
                                                      0.0, 0.0, 1.0, 0.0,
                                                      0.0, 0.0, 0.0, 1.0] ))
    end
  end

  context ".translation" do
    it "should generate a translation matrix from an Array" do
      t = NMatrix.translation([4,5,6])
      expect(t).to be_within(1e-8).of(NMatrix.new([4,4], [1, 0, 0, 4,
                                                      0, 1, 0, 5,
                                                      0, 0, 1, 6,
                                                      0, 0, 0, 1] ))
    end

    it "should generate a translation matrix from x, y, and z values" do
      t = NMatrix.translation(4,5,6)
      expect(t).to be_within(1e-8).of(NMatrix.new([4,4], [1, 0, 0, 4,
                                                      0, 1, 0, 5,
                                                      0, 0, 1, 6,
                                                      0, 0, 0, 1] ))
    end

    it "should generate a translation matrix from an NMatrix with correctly inferred dtype" do
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      t = NMatrix.translation(NMatrix.new([3,1], [4,5,6], dtype: :float64) )
      expect(t).to be_within(1e-8).of(NMatrix.new([4,4], [1, 0, 0, 4,
                                                      0, 1, 0, 5,
                                                      0, 0, 1, 6,
                                                      0, 0, 0, 1] ))
      expect(t.dtype).to be(:float64)
    end
  end

  context "#quaternion" do
    it "should generate a singularity-free quaternion" do
      transform = NMatrix.new([4,4], [-0.9995825,-0.02527934,-0.0139845,50.61761,-0.02732551,0.9844284,0.1736463,-22.95566,0.009376526,0.1739562,-0.9847089,7.1521,0,0,0,1])
      q = transform.quaternion
      expect(Math.sqrt(q[0]**2 + q[1]**2 + q[2]**2 + q[3]**2)).to be_within(1e-6).of(1.0)
    end
  end
end


================================================
FILE: spec/io/fortran_format_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == fortran_format_spec.rb
#
# Basic tests for NMatrix::IO::FortranFormat.
#

require './lib/nmatrix'

describe NMatrix::IO::FortranFormat do
  it "parses integer FORTRAN formats" do
    int_fmt =  NMatrix::IO::FortranFormat::Reader.new('(16I5)').parse

    expect(int_fmt[:format_code]).to eq "INT_ID"
    expect(int_fmt[:repeat])     .to eq 16
    expect(int_fmt[:field_width]).to eq 5    

    int_fmt = NMatrix::IO::FortranFormat::Reader.new('(I4)').parse 

    expect(int_fmt[:format_code]).to eq "INT_ID"
    expect(int_fmt[:field_width]).to eq 4
  end

  it "parses floating point FORTRAN formats" do
    fp_fmt = NMatrix::IO::FortranFormat::Reader.new('(10F7.1)').parse

    expect(fp_fmt[:format_code])       .to eq "FP_ID"
    expect(fp_fmt[:repeat])            .to eq 10
    expect(fp_fmt[:field_width])       .to eq 7
    expect(fp_fmt[:post_decimal_width]).to eq 1

    fp_fmt = NMatrix::IO::FortranFormat::Reader.new('(F4.2)').parse

    expect(fp_fmt[:format_code])       .to eq "FP_ID"
    expect(fp_fmt[:field_width])       .to eq 4
    expect(fp_fmt[:post_decimal_width]).to eq 2
  end

  it "parses exponential FORTRAN formats" do
    exp_fmt = NMatrix::IO::FortranFormat::Reader.new('(2E8.3E3)').parse

    expect(exp_fmt[:format_code])       .to eq "EXP_ID"
    expect(exp_fmt[:repeat])            .to eq 2
    expect(exp_fmt[:field_width])       .to eq 8
    expect(exp_fmt[:post_decimal_width]).to eq 3
    expect(exp_fmt[:exponent_width])    .to eq 3

    exp_fmt = NMatrix::IO::FortranFormat::Reader.new('(3E3.6)').parse

    expect(exp_fmt[:format_code])       .to eq "EXP_ID"
    expect(exp_fmt[:repeat])            .to eq 3
    expect(exp_fmt[:field_width])       .to eq 3
    expect(exp_fmt[:post_decimal_width]).to eq 6

    exp_fmt = NMatrix::IO::FortranFormat::Reader.new('(E4.5)').parse
    expect(exp_fmt[:format_code])       .to eq "EXP_ID"
    expect(exp_fmt[:field_width])       .to eq 4
    expect(exp_fmt[:post_decimal_width]).to eq 5
  end

  ['I3', '(F4)', '(E3.', '(E4.E5)'].each do |bad_format|
    it "doesn't let bad input through : #{bad_format}" do
      expect {
        NMatrix::IO::FortranFormat::Reader.new(bad_format).parse
      }.to raise_error(IOError)
  end
end
end


================================================
FILE: spec/io/harwell_boeing_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == io_spec.rb
#
# Basic tests for NMatrix::IO::HarwelBoeing.

# TODO : After the fortran format thing is done
require 'spec_helper'
require "./lib/nmatrix"

describe NMatrix::IO::HarwellBoeing do
  def check_file_header header
    expect(header[:title])    .to eq("Title")
    expect(header[:key])      .to eq("Key")

    expect(header[:totcrd])   .to eq(5)
    expect(header[:ptrcrd])   .to eq(1)
    expect(header[:indcrd])   .to eq(1)
    expect(header[:valcrd])   .to eq(3)
    expect(header[:rhscrd])   .to eq(0)
    
    expect(header[:mxtype])   .to eq('RUA')
    expect(header[:nrow])     .to eq(5)
    expect(header[:ncol])     .to eq(5)
    expect(header[:nnzero])   .to eq(13)
    expect(header[:neltvl])   .to eq(0)

    expect(header[:ptrfmt])   .to eq({
      format_code: "INT_ID",
      repeat:             6,         
      field_width:        3
      })
    expect(header[:indfmt])   .to eq({
      format_code: "INT_ID",
      repeat:            13,
      field_width:       3
      })
    expect(header[:valfmt])   .to eq({
      format_code:         "EXP_ID",
      repeat:                     5,
      field_width:               15,
      post_decimal_width:         8
      })
    expect(header[:rhsfmt])   .to eq({
      format_code:         "EXP_ID",
      repeat:                     5,
      field_width:               15,
      post_decimal_width:         8
      })
  end

  it "loads a Harwell Boeing file values and header (currently real only)" do
    n, h = NMatrix::IO::HarwellBoeing.load("spec/io/test.rua")

    expect(n.is_a? NMatrix).to eq(true)
    expect(n.cols)         .to eq(5)
    expect(n.rows)         .to eq(5)

    expect(n[0,0])         .to eq(11)
    expect(n[4,4])         .to eq(55)

    expect(h.is_a? Hash).to eq(true) 
    check_file_header(h)
  end

  it "loads only the header of the file when specified" do
    h = NMatrix::IO::HarwellBoeing.load("spec/io/test.rua", header: true)

    expect(h.is_a? Hash).to eq(true)
    check_file_header(h)
  end

  it "raises error for wrong Harwell Boeing file name" do
    expect{
      NMatrix::IO::HarwellBoeing.load("spec/io/wrong.afx")
    }.to raise_error(IOError)
  end
end

================================================
FILE: spec/io/test.rua
================================================
Title                                                                   Key     
             5             1             1             3             0
RUA                        5             5            13             0
(6I3)           (13I3)          (5E15.8)            (5E15.8)            
  1  4  7  8 11 14
  1  3  5  2  3  5  3  1  3  4  3  4  5
11.0           31.0           51.0           22.0           32.0
52.0           33.0           14.0           34.0           44.0
35.0           45.0           55.0


================================================
FILE: spec/io_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == io_spec.rb
#
# Basic tests for NMatrix::IO.
#
require "tmpdir" # Used to avoid cluttering the repository.
require 'spec_helper'
require "./lib/nmatrix"

describe NMatrix::IO do
  let(:tmp_dir)  { Dir.mktmpdir }
  let(:test_out) { File.join(tmp_dir, 'test-out') }

  it "repacks a string" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    expect(NMatrix::IO::Matlab.repack("hello", :miUINT8, :byte)).to eq("hello")
  end

  it "creates yale from internal byte-string function" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    ia = NMatrix::IO::Matlab.repack("\0\1\3\3\4", :miUINT8, :itype)
    ja = NMatrix::IO::Matlab.repack("\0\1\3\0\0\0\0\0\0\0\0", :miUINT8, :itype)
    n = NMatrix.new(:yale, [4,4], :byte, ia, ja, "\2\3\5\4", :byte)
    expect(n[0,0]).to eq(2)
    expect(n[1,1]).to eq(3)
    expect(n[1,3]).to eq(5)
    expect(n[3,0]).to eq(4)
    expect(n[2,2]).to eq(0)
    expect(n[3,3]).to eq(0)
  end

  it "reads MATLAB .mat file containing a single square sparse matrix" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    n = NMatrix::IO::Matlab.load_mat("spec/4x4_sparse.mat")
    expect(n[0,0]).to eq(2)
    expect(n[1,1]).to eq(3)
    expect(n[1,3]).to eq(5)
    expect(n[3,0]).to eq(4)
    expect(n[2,2]).to eq(0)
    expect(n[3,3]).to eq(0)
  end

  it "reads MATLAB .mat file containing a single dense integer matrix" do
    n = NMatrix::IO::Matlab.load_mat("spec/4x5_dense.mat")
    m = NMatrix.new([4,5], [16,17,18,19,20,15,14,13,12,11,6,7,8,9,10,5,4,3,2,1])
    expect(n).to eq(m)
  end

  it "reads MATLAB .mat file containing a single dense double matrix" do
    n = NMatrix::IO::Matlab.load_mat("spec/2x2_dense_double.mat")
    m = NMatrix.new(2, [1.1, 2.0, 3.0, 4.0], dtype: :float64)
    expect(n).to eq(m)
  end

  it "loads and saves MatrixMarket .mtx file containing a single large sparse double matrix" do
    pending "spec disabled because it's so slow"
    n = NMatrix::IO::Market.load("spec/utm5940.mtx")
    NMatrix::IO::Market.save(n, "spec/utm5940.saved.mtx")
    expect(`wc -l spec/utm5940.mtx`.split[0]).to eq(`wc -l spec/utm5940.saved.mtx`.split[0])
  end

  it "loads a Point Cloud Library PCD file" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    n = NMatrix::IO::PointCloud.load("spec/test.pcd")
    expect(n.column(0).sort.uniq.size).to eq(1)
    expect(n.column(0).sort.uniq.first).to eq(207.008)
    expect(n[0,3]).to eq(0)
  end

  it "raises an error when reading a non-existent file" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    fn = rand(10000000).to_i.to_s
    while File.exist?(fn)
      fn = rand(10000000).to_i.to_s
    end
    expect{ NMatrix.read(fn) }.to raise_error(Errno::ENOENT)
  end

  it "reads and writes NMatrix dense" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    n = NMatrix.new(:dense, [4,3], [0,1,2,3,4,5,6,7,8,9,10,11], :int32)
    n.write(test_out)

    m = NMatrix.read(test_out)
    expect(n).to eq(m)
  end

  it "reads and writes NMatrix dense as symmetric" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    n = NMatrix.new(:dense, 3, [0,1,2,1,3,4,2,4,5], :int16)
    n.write(test_out, :symmetric)

    m = NMatrix.read(test_out)
    expect(n).to eq(m)
  end

  it "reads and writes NMatrix dense as skew" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    n = NMatrix.new(:dense, 3, [0,1,2,-1,3,4,-2,-4,5], :float64)
    n.write(test_out, :skew)

    m = NMatrix.read(test_out)
    expect(n).to eq(m)
  end

  it "reads and writes NMatrix dense as hermitian" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    n = NMatrix.new(:dense, 3, [0,1,2,1,3,4,2,4,5], :complex64)
    n.write(test_out, :hermitian)

    m = NMatrix.read(test_out)
    expect(n).to eq(m)
  end

  it "reads and writes NMatrix dense as upper" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    n = NMatrix.new(:dense, 3, [-1,1,2,3,4,5,6,7,8], :int32)
    n.write(test_out, :upper)

    m = NMatrix.new(:dense, 3, [-1,1,2,0,4,5,0,0,8], :int32) # lower version of the same

    o = NMatrix.read(test_out)
    expect(o).to eq(m)
    expect(o).not_to eq(n)
  end

  it "reads and writes NMatrix dense as lower" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    n = NMatrix.new(:dense, 3, [-1,1,2,3,4,5,6,7,8], :int32)
    n.write(test_out, :lower)

    m = NMatrix.new(:dense, 3, [-1,0,0,3,4,0,6,7,8], :int32) # lower version of the same

    o = NMatrix.read(test_out)
    expect(o).to eq(m)
    expect(o).not_to eq(n)
  end
end


================================================
FILE: spec/lapack_core_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == lapack_core_spec.rb
#
# Tests for LAPACK functions that have internal implementations (i.e. they
# don't rely on external libraries) and also functions that are implemented
# by both nmatrix-atlas and nmatrix-lapacke. These tests will also be run for the
# plugins that do use external libraries, since they will override the
# internal implmentations.
#

require 'spec_helper'

describe "NMatrix::LAPACK functions with internal implementations" do
  # where integer math is allowed
  [:byte, :int8, :int16, :int32, :int64, :float32, :float64, :complex64, :complex128].each do |dtype|
    context dtype do
      # This spec seems a little weird. It looks like laswp ignores the last
      # element of piv, though maybe I misunderstand smth. It would make
      # more sense if piv were [2,1,3,3]
      it "exposes clapack laswp" do
        a = NMatrix.new(:dense, [3,4], [1,2,3,4,5,6,7,8,9,10,11,12], dtype)
        NMatrix::LAPACK::clapack_laswp(3, a, 4, 0, 3, [2,1,3,0], 1)
        b = NMatrix.new(:dense, [3,4], [3,2,4,1,7,6,8,5,11,10,12,9], dtype)
        expect(a).to eq(b)
      end

      # This spec is OK, because the default behavior for permute_columns
      # is :intuitive, which is different from :lapack (default laswp behavior)
      it "exposes NMatrix#permute_columns and #permute_columns! (user-friendly laswp)" do
        a = NMatrix.new(:dense, [3,4], [1,2,3,4,5,6,7,8,9,10,11,12], dtype)
        b = NMatrix.new(:dense, [3,4], [3,2,4,1,7,6,8,5,11,10,12,9], dtype)
        piv = [2,1,3,0]
        r = a.permute_columns(piv)
        expect(r).not_to eq(a)
        expect(r).to eq(b)
        a.permute_columns!(piv)
        expect(a).to eq(b)
      end
    end
  end

  # where integer math is not allowed
  [:float32, :float64, :complex64, :complex128].each do |dtype|
    context dtype do

      # clapack_getrf performs a LU decomposition, but unlike the
      # standard LAPACK getrf, it's the upper matrix that has unit diagonals
      # and the permutation is done in columns not rows. See the code for
      # details.
      # Also the rows in the pivot vector are indexed starting from 0,
      # rather than 1 as in LAPACK
      it "calculates LU decomposition using clapack_getrf (row-major, square)" do
        a = NMatrix.new(3, [4,9,2,3,5,7,8,1,6], dtype: dtype)
        ipiv = NMatrix::LAPACK::clapack_getrf(:row, a.shape[0], a.shape[1], a, a.shape[1])
        b = NMatrix.new(3,[9, 2.0/9, 4.0/9,
                           5, 53.0/9, 7.0/53,
                           1, 52.0/9, 360.0/53], dtype: dtype)
        ipiv_true = [1,2,2]

        # delta varies for different dtypes
        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-15
              end

        expect(a).to be_within(err).of(b)
        expect(ipiv).to eq(ipiv_true)
      end

      it "calculates LU decomposition using clapack_getrf (row-major, rectangular)" do
        a = NMatrix.new([3,4], GETRF_EXAMPLE_ARRAY, dtype: dtype)
        ipiv = NMatrix::LAPACK::clapack_getrf(:row, a.shape[0], a.shape[1], a, a.shape[1])
        #we can't use GETRF_SOLUTION_ARRAY here, because of the different
        #conventions of clapack_getrf
        b = NMatrix.new([3,4],[10.0, -0.1,      0.0,       0.4,
                               3.0,   9.3,  20.0/93,   38.0/93,
                               1.0,   7.1, 602.0/93, 251.0/602], dtype: dtype)
        ipiv_true = [2,2,2]

        # delta varies for different dtypes
        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-15
              end

        expect(a).to be_within(err).of(b)
        expect(ipiv).to eq(ipiv_true)
      end

      #Normally we wouldn't check column-major routines, since all our matrices
      #are row-major, but we use the column-major version in #getrf!, so we
      #want to test it here.
      it "calculates LU decomposition using clapack_getrf (col-major, rectangular)" do
        #this is supposed to represent the 3x2 matrix
        # -1  2
        #  0  3
        #  1 -2
        a = NMatrix.new([1,6], [-1,0,1,2,3,-2], dtype: dtype)
        ipiv = NMatrix::LAPACK::clapack_getrf(:col, 3, 2, a, 3)
        b = NMatrix.new([1,6], [-1,0,-1,2,3,0], dtype: dtype)
        ipiv_true = [0,1]

        # delta varies for different dtypes
        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-15
              end

        expect(a).to be_within(err).of(b)
        expect(ipiv).to eq(ipiv_true)
      end

      it "calculates LU decomposition using #getrf! (rectangular)" do
        a = NMatrix.new([3,4], GETRF_EXAMPLE_ARRAY, dtype: dtype)
        ipiv = a.getrf!
        b = NMatrix.new([3,4], GETRF_SOLUTION_ARRAY, dtype: dtype)
        ipiv_true = [2,3,3]

        # delta varies for different dtypes
        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-14
              end

        expect(a).to be_within(err).of(b)
        expect(ipiv).to eq(ipiv_true)
      end

      it "calculates LU decomposition using #getrf! (square)" do
        a = NMatrix.new([4,4], [0,1,2,3, 1,1,1,1, 0,-1,-2,0, 0,2,0,2], dtype: dtype)
        ipiv = a.getrf!

        b = NMatrix.new([4,4], [1,1,1,1, 0,2,0,2, 0,-0.5,-2,1, 0,0.5,-1,3], dtype: dtype)
        ipiv_true = [2,4,3,4]

        expect(a).to eq(b)
        expect(ipiv).to eq(ipiv_true)
      end

      # Together, these calls are basically xGESV from LAPACK: http://www.netlib.org/lapack/double/dgesv.f
      it "exposes clapack_getrs" do
        a     = NMatrix.new(3, [-2,4,-3, 3,-2,1, 0,-4,3], dtype: dtype)
        ipiv  = NMatrix::LAPACK::clapack_getrf(:row, 3, 3, a, 3)
        b     = NMatrix.new([3,1], [-1, 17, -9], dtype: dtype)

        NMatrix::LAPACK::clapack_getrs(:row, false, 3, 1, a, 3, ipiv, b, 3)

        expect(b[0]).to eq(5)
        expect(b[1]).to eq(-15.0/2)
        expect(b[2]).to eq(-13)
      end

      it "solves matrix equation (non-vector rhs) using clapack_getrs" do
        a     = NMatrix.new(3, [-2,4,-3, 3,-2,1, 0,-4,3], dtype: dtype)
        b     = NMatrix.new([3,2], [-1,2, 17,1, -9,-4], dtype: dtype)

        n = a.shape[0]
        nrhs = b.shape[1]

        ipiv  = NMatrix::LAPACK::clapack_getrf(:row, n, n, a, n)
        # Even though we pass :row to clapack_getrs, it still interprets b as
        # column-major, so need to transpose b before and after:
        b = b.transpose
        NMatrix::LAPACK::clapack_getrs(:row, false, n, nrhs, a, n, ipiv, b, n)
        b = b.transpose

        b_true = NMatrix.new([3,2], [5,1, -7.5,1, -13,0], dtype: dtype)
        expect(b).to eq(b_true)
      end

      #posv is like potrf+potrs
      #posv is implemented in both nmatrix-atlas and nmatrix-lapacke, so the spec
      #needs to be shared here
      it "solves a (symmetric positive-definite) matrix equation using posv (vector rhs)" do
        a = NMatrix.new(3, [4, 0,-1,
                            0, 2, 1,
                            0, 0, 1], dtype: dtype)
        b = NMatrix.new([3,1], [4,2,0], dtype: dtype)

        begin
          x = NMatrix::LAPACK::posv(:upper, a, b)
        rescue NotImplementedError => e
          pending e.to_s
        end

        x_true = NMatrix.new([3,1], [1, 1, 0], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(x).to be_within(err).of(x_true)
      end

      it "solves a (symmetric positive-definite) matrix equation using posv (non-vector rhs)" do
        a = NMatrix.new(3, [4, 0,-1,
                            0, 2, 1,
                            0, 0, 1], dtype: dtype)
        b = NMatrix.new([3,2], [4,-1, 2,-1, 0,0], dtype: dtype)

        begin
          x = NMatrix::LAPACK::posv(:upper, a, b)
        rescue NotImplementedError => e
          pending e.to_s
        end

        x_true = NMatrix.new([3,2], [1,0, 1,-1, 0,1], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(x).to be_within(err).of(x_true)
      end

      it "calculates the singular value decomposition with NMatrix#gesvd" do
        #example from Wikipedia
        m = 4
        n = 5
        mn_min = [m,n].min
        a = NMatrix.new([m,n],[1,0,0,0,2, 0,0,3,0,0, 0,0,0,0,0, 0,4,0,0,0], dtype: dtype)

        begin
          u, s, vt = a.gesvd
        rescue NotImplementedError => e
          pending e.to_s
        end

        s_true = NMatrix.new([mn_min,1], [4,3,Math.sqrt(5),0], dtype: a.abs_dtype)
        u_true = NMatrix.new([m,m], [0,0,1,0, 0,1,0,0, 0,0,0,-1, 1,0,0,0], dtype: dtype)
        vt_true = NMatrix.new([n,n], [0,1,0,0,0, 0,0,1,0,0, Math.sqrt(0.2),0,0,0,Math.sqrt(0.8), 0,0,0,1,0, -Math.sqrt(0.8),0,0,0,Math.sqrt(0.2)], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(s).to be_within(err).of(s_true)
        expect(u).to be_within(err).of(u_true)
        expect(vt).to be_within(err).of(vt_true)

        expect(s.dtype).to eq(a.abs_dtype)
        expect(u.dtype).to eq(dtype)
        expect(vt.dtype).to eq(dtype)
      end

      it "calculates the singular value decomposition with NMatrix#gesdd" do
        #example from Wikipedia
        m = 4
        n = 5
        mn_min = [m,n].min
        a = NMatrix.new([m,n],[1,0,0,0,2, 0,0,3,0,0, 0,0,0,0,0, 0,4,0,0,0], dtype: dtype)

        begin
          u, s, vt = a.gesdd
        rescue NotImplementedError => e
          pending e.to_s
        end

        s_true = NMatrix.new([mn_min,1], [4,3,Math.sqrt(5),0], dtype: a.abs_dtype)
        u_true = NMatrix.new([m,m], [0,0,1,0, 0,1,0,0, 0,0,0,-1, 1,0,0,0], dtype: dtype)
        vt_true = NMatrix.new([n,n], [0,1,0,0,0, 0,0,1,0,0, Math.sqrt(0.2),0,0,0,Math.sqrt(0.8), 0,0,0,1,0, -Math.sqrt(0.8),0,0,0,Math.sqrt(0.2)], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(s).to be_within(err).of(s_true)
        expect(u).to be_within(err).of(u_true)
        expect(vt).to be_within(err).of(vt_true)
      end


      it "calculates eigenvalues and eigenvectors NMatrix::LAPACK.geev (real matrix, complex eigenvalues)" do
        n = 3
        a = NMatrix.new([n,n], [-1,0,0, 0,1,-2, 0,1,-1], dtype: dtype)

        begin
          eigenvalues, vl, vr = NMatrix::LAPACK.geev(a)
        rescue NotImplementedError => e
          pending e.to_s
        end

        eigenvalues_true = NMatrix.new([n,1], [Complex(0,1), -Complex(0,1), -1], dtype: NMatrix.upcast(dtype, :complex64))
        vr_true = NMatrix.new([n,n],[0,0,1,
                                     2/Math.sqrt(6),2/Math.sqrt(6),0,
                                     Complex(1,-1)/Math.sqrt(6),Complex(1,1)/Math.sqrt(6),0], dtype: NMatrix.upcast(dtype, :complex64))
        vl_true = NMatrix.new([n,n],[0,0,1,
                                     Complex(-1,1)/Math.sqrt(6),Complex(-1,-1)/Math.sqrt(6),0,
                                     2/Math.sqrt(6),2/Math.sqrt(6),0], dtype: NMatrix.upcast(dtype, :complex64))

        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-15
              end

        expect(eigenvalues).to be_within(err).of(eigenvalues_true)
        expect(vr).to be_within(err).of(vr_true)
        expect(vl).to be_within(err).of(vl_true)

        expect(eigenvalues.dtype).to eq(NMatrix.upcast(dtype, :complex64))
        expect(vr.dtype).to eq(NMatrix.upcast(dtype, :complex64))
        expect(vl.dtype).to eq(NMatrix.upcast(dtype, :complex64))
      end

      it "calculates eigenvalues and eigenvectors NMatrix::LAPACK.geev (real matrix, real eigenvalues)" do
        n = 3
        a = NMatrix.new([n,n], [2,0,0, 0,3,2, 0,1,2], dtype: dtype)

        begin
          eigenvalues, vl, vr = NMatrix::LAPACK.geev(a)
        rescue NotImplementedError => e
          pending e.to_s
        end

        eigenvalues_true = NMatrix.new([n,1], [1, 4, 2], dtype: dtype)

        # For some reason, some of the eigenvectors have different signs
        # when we use the complex versions of geev. This is totally fine, since
        # they are still normalized eigenvectors even with the sign flipped.
        if a.complex_dtype?
          vr_true = NMatrix.new([n,n],[0,0,1,
                                       1/Math.sqrt(2),2/Math.sqrt(5),0,
                                       -1/Math.sqrt(2),1/Math.sqrt(5),0], dtype: dtype)
          vl_true = NMatrix.new([n,n],[0,0,1,
                                       -1/Math.sqrt(5),1/Math.sqrt(2),0,
                                       2/Math.sqrt(5),1/Math.sqrt(2),0], dtype: dtype)
        else
          vr_true = NMatrix.new([n,n],[0,0,1,
                                       1/Math.sqrt(2),-2/Math.sqrt(5),0,
                                       -1/Math.sqrt(2),-1/Math.sqrt(5),0], dtype: dtype)
          vl_true = NMatrix.new([n,n],[0,0,1,
                                       1/Math.sqrt(5),-1/Math.sqrt(2),0,
                                       -2/Math.sqrt(5),-1/Math.sqrt(2),0], dtype: dtype)
        end

        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-15
              end

        expect(eigenvalues).to be_within(err).of(eigenvalues_true)
        expect(vr).to be_within(err).of(vr_true)
        expect(vl).to be_within(err).of(vl_true)

        expect(eigenvalues.dtype).to eq(dtype)
        expect(vr.dtype).to eq(dtype)
        expect(vl.dtype).to eq(dtype)
      end

      it "calculates eigenvalues and eigenvectors NMatrix::LAPACK.geev (left eigenvectors only)" do
        n = 3
        a = NMatrix.new([n,n], [-1,0,0, 0,1,-2, 0,1,-1], dtype: dtype)

        begin
          eigenvalues, vl = NMatrix::LAPACK.geev(a, :left)
        rescue NotImplementedError => e
          pending e.to_s
        end

        eigenvalues_true = NMatrix.new([n,1], [Complex(0,1), -Complex(0,1), -1], dtype: NMatrix.upcast(dtype, :complex64))
        vl_true = NMatrix.new([n,n],[0,0,1,
                                     Complex(-1,1)/Math.sqrt(6),Complex(-1,-1)/Math.sqrt(6),0,
                                     2/Math.sqrt(6),2/Math.sqrt(6),0], dtype: NMatrix.upcast(dtype, :complex64))

        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-15
              end

        expect(eigenvalues).to be_within(err).of(eigenvalues_true)
        expect(vl).to be_within(err).of(vl_true)
      end

      it "calculates eigenvalues and eigenvectors NMatrix::LAPACK.geev (right eigenvectors only)" do
        n = 3
        a = NMatrix.new([n,n], [-1,0,0, 0,1,-2, 0,1,-1], dtype: dtype)

        begin
          eigenvalues, vr = NMatrix::LAPACK.geev(a, :right)
        rescue NotImplementedError => e
          pending e.to_s
        end

        eigenvalues_true = NMatrix.new([n,1], [Complex(0,1), -Complex(0,1), -1], dtype: NMatrix.upcast(dtype, :complex64))
        vr_true = NMatrix.new([n,n],[0,0,1,
                                     2/Math.sqrt(6),2/Math.sqrt(6),0,
                                     Complex(1,-1)/Math.sqrt(6),Complex(1,1)/Math.sqrt(6),0], dtype: NMatrix.upcast(dtype, :complex64))

        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-15
              end

        expect(eigenvalues).to be_within(err).of(eigenvalues_true)
        expect(vr).to be_within(err).of(vr_true)
      end
    end
  end

  [:complex64, :complex128].each do |dtype|
    context dtype do
      it "calculates eigenvalues and eigenvectors NMatrix::LAPACK.geev (complex matrix)" do
        n = 3
        a = NMatrix.new([n,n], [Complex(0,1),0,0, 0,3,2, 0,1,2], dtype: dtype)

        begin
          eigenvalues, vl, vr = NMatrix::LAPACK.geev(a)
        rescue NotImplementedError => e
          pending e.to_s
        end

        eigenvalues_true = NMatrix.new([n,1], [1, 4, Complex(0,1)], dtype: dtype)
        vr_true = NMatrix.new([n,n],[0,0,1,
                                     1/Math.sqrt(2),2/Math.sqrt(5),0,
                                     -1/Math.sqrt(2),1/Math.sqrt(5),0], dtype: dtype)
        vl_true = NMatrix.new([n,n],[0,0,1,
                                     -1/Math.sqrt(5),1/Math.sqrt(2),0,
                                     2/Math.sqrt(5),1/Math.sqrt(2),0], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-15
              end

        expect(eigenvalues).to be_within(err).of(eigenvalues_true)
        expect(vr).to be_within(err).of(vr_true)
        expect(vl).to be_within(err).of(vl_true)
      end
    end
  end
end


================================================
FILE: spec/leakcheck.rb
================================================
require "./lib/nmatrix"

# Fixed:
#n = NMatrix.new(:yale, [8,2], :int64)
#m = NMatrix.new(:yale, [2,8], :int64)
#100.times do
#  n.dot(m)
#end
#GC.start

# Remaining:
100.times do |t|
  n = NMatrix.new(:dense, 1000, :float64)
  n[0,t] = 1.0
  puts n[t,0]
end


================================================
FILE: spec/math_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == math_spec.rb
#
# Tests for non-BLAS and non-LAPACK math functions, or for simplified
# versions of unfriendly BLAS and LAPACK functions.
#

require 'spec_helper'

describe "math" do
  context "elementwise math functions" do

    [:dense,:list,:yale].each do |stype|
      context stype do

        [:int64,:float64].each do |dtype|
          context dtype do
            before :each do
              @size = [2,2]
              @m = NMatrix.seq(@size, dtype: dtype, stype: stype)+1
              @a = @m.to_a.flatten
            end

            NMatrix::NMMath::METHODS_ARITY_1.each do |meth|
              #skip inverse regular trig functions
              next if meth.to_s.start_with?('a') and (not meth.to_s.end_with?('h')) \
                and NMatrix::NMMath::METHODS_ARITY_1.include?(
                  meth.to_s[1...meth.to_s.length].to_sym)
              next if meth == :atanh

              if meth == :-@
                it "should correctly apply elementwise negation" do
                  expect(@m.send(meth)).to eq N.new(@size, @a.map { |e| -e }, dtype: dtype, stype: stype)
                end
                next
              end

              it "should correctly apply elementwise #{meth}" do

                expect(@m.send(meth)).to eq N.new(@size, @a.map{ |e| Math.send(meth, e) },
                                                 dtype: :float64, stype: stype)
              end
            end

            NMatrix::NMMath::METHODS_ARITY_2.each do |meth|
              next if meth == :atan2
              it "should correctly apply elementwise #{meth}" do
                expect(@m.send(meth, @m)).to eq N.new(@size, @a.map{ |e|
                                                     Math.send(meth, e, e) },
                                                     dtype: :float64,
                                                     stype: stype)
              end

              it "should correctly apply elementwise #{meth} with a scalar first arg" do
                expect(Math.send(meth, 1, @m)).to eq N.new(@size, @a.map { |e| Math.send(meth, 1, e) }, dtype: :float64, stype: stype)
              end

              it "should correctly apply elementwise #{meth} with a scalar second arg" do
                expect(@m.send(meth, 1)).to eq N.new(@size, @a.map { |e| Math.send(meth, e, 1) }, dtype: :float64, stype: stype)
              end
            end

            it "should correctly apply elementwise natural log" do
              expect(@m.log).to eq N.new(@size, [0, Math.log(2), Math.log(3), Math.log(4)],
                                        dtype: :float64, stype: stype)
            end

            it "should correctly apply elementwise log with arbitrary base" do
              expect(@m.log(3)).to eq N.new(@size, [0, Math.log(2,3), 1, Math.log(4,3)],
                                           dtype: :float64, stype: stype)
            end

            context "inverse trig functions" do
              before :each do
                @m = NMatrix.seq(@size, dtype: dtype, stype: stype)/4
                @a = @m.to_a.flatten
              end
              [:asin, :acos, :atan, :atanh].each do |atf|

                it "should correctly apply elementwise #{atf}" do
                  expect(@m.send(atf)).to eq N.new(@size,
                                               @a.map{ |e| Math.send(atf, e) },
                                               dtype: :float64, stype: stype)
                end
              end

              it "should correctly apply elementtwise atan2" do
                expect(@m.atan2(@m*0+1)).to eq N.new(@size,
                  @a.map { |e| Math.send(:atan2, e, 1) }, dtype: :float64, stype: stype)
              end

              it "should correctly apply elementwise atan2 with a scalar first arg" do
                expect(Math.atan2(1, @m)).to eq N.new(@size, @a.map { |e| Math.send(:atan2, 1, e) }, dtype: :float64, stype: stype)
              end

              it "should correctly apply elementwise atan2 with a scalar second arg" do
                  expect(@m.atan2(1)).to eq N.new(@size, @a.map { |e| Math.send(:atan2, e, 1) }, dtype: :float64, stype: stype)
              end
            end
          end
        end

        context "Floor and ceil for #{stype}" do

          [:floor, :ceil].each do |meth|
            ALL_DTYPES.each do |dtype|
              context dtype do
                before :each do
                  @size = [2,2]
                  @m    = NMatrix.seq(@size, dtype: dtype, stype: stype)+1 unless jruby? and dtype == :object
                  @a    = @m.to_a.flatten
                end

                if dtype.to_s.match(/int/) or [:byte, :object].include?(dtype)
                  it "should return #{dtype} for #{dtype}" do
                    pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object

                    expect(@m.send(meth)).to eq N.new(@size, @a.map { |e| e.send(meth) }, dtype: dtype, stype: stype)

                    if dtype == :object
                      expect(@m.send(meth).dtype).to eq :object
                    else
                      expect(@m.send(meth).integer_dtype?).to eq true
                    end
                  end
                elsif dtype.to_s.match(/float/)
                  it "should return dtype int64 for #{dtype}" do

                    expect(@m.send(meth)).to eq N.new(@size, @a.map { |e| e.send(meth) }, dtype: dtype, stype: stype)

                    expect(@m.send(meth).dtype).to eq :int64
                  end
                elsif dtype.to_s.match(/complex/)
                  it "should properly calculate #{meth} for #{dtype}" do
                    pending("not yet implemented for NMatrix-JRuby") if jruby?

                    expect(@m.send(meth)).to eq N.new(@size, @a.map { |e| e = Complex(e.real.send(meth), e.imag.send(meth)) }, dtype: dtype, stype: stype)

                    expect(@m.send(meth).dtype).to eq :complex64  if dtype == :complex64
                    expect(@m.send(meth).dtype).to eq :complex128 if dtype == :complex128
                  end
                end
              end
            end
          end
        end

        context "#round for #{stype}" do
          ALL_DTYPES.each do |dtype|
            context dtype do
              before :each do
                @size = [2,2]
                @mat  = NMatrix.new @size, [1.33334, 0.9998, 1.9999, -8.9999],
                  dtype: dtype, stype: stype
                @ans  = @mat.to_a.flatten unless jruby? and dtype == :object
              end

              it "rounds" do
                pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
                expect(@mat.round).to eq(N.new(@size, @ans.map { |a| a.round},
                  dtype: dtype, stype: stype))
              end unless(/complex/ =~ dtype)

              it "rounds with args" do
                pending("not yet implemented for NMatrix-JRuby") if jruby?
                expect(@mat.round(2)).to eq(N.new(@size, @ans.map { |a| a.round(2)},
                  dtype: dtype, stype: stype))
              end unless(/complex/ =~ dtype)

              it "rounds complex with args" do
                pending("not yet implemented for NMatrix-JRuby") if jruby?
                puts @mat.round(2)
                expect(@mat.round(2)).to be_within(0.0001).of(N.new [2,2], @ans.map {|a|
                  Complex(a.real.round(2), a.imag.round(2))},dtype: dtype, stype: stype)
              end if(/complex/ =~ dtype)

              it "rounds complex" do
                pending("not yet implemented for NMatrix-JRuby") if jruby?
                expect(@mat.round).to eq(N.new [2,2], @ans.map {|a|
                  Complex(a.real.round, a.imag.round)},dtype: dtype, stype: stype)
              end if(/complex/ =~ dtype)
            end
          end
        end

      end
    end
  end

  NON_INTEGER_DTYPES.each do |dtype|
    context dtype do
      before do
        @m = NMatrix.new([3,4], GETRF_EXAMPLE_ARRAY, dtype: dtype)
        @err = case dtype
                 when :float32, :complex64
                   1e-6
                 when :float64, :complex128
                   1e-14
               end
      end

      #haven't check this spec yet. Also it doesn't check all the elements of the matrix.
      it "should correctly factorize a matrix" do
        pending("not yet implemented for :object dtype") if dtype == :object
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        a = @m.factorize_lu
        expect(a).to be_within(@err).of(NMatrix.new([3,4], GETRF_SOLUTION_ARRAY, dtype: dtype))
      end

      it "also returns the permutation matrix" do
        pending("not yet implemented for :object dtype") if dtype == :object
        pending("not yet implemented for NMatrix-JRuby") if jruby?

        a, p = @m.factorize_lu perm_matrix: true

        expect(a).to be_within(@err).of(NMatrix.new([3,4], GETRF_SOLUTION_ARRAY, dtype: dtype))

        p_true = NMatrix.new([3,3], [0,0,1,1,0,0,0,1,0], dtype: dtype)
        expect(p).to eq(p_true)
      end
    end
  end

  NON_INTEGER_DTYPES.each do |dtype|
    context dtype do

      it "calculates cholesky decomposition using potrf (lower)" do
        #a = NMatrix.new([3,3],[1,1,1, 1,2,2, 1,2,6], dtype: dtype)
        # We use the matrix
        # 1 1 1
        # 1 2 2
        # 1 2 6
        # which is symmetric and positive-definite as required, but
        # we need only store the lower-half of the matrix.
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        pending("not yet implemented for :object dtype") if dtype == :object
        a = NMatrix.new([3,3],[1,0,0, 1,2,0, 1,2,6], dtype: dtype)
        begin
          r = a.potrf!(:lower)

          b = NMatrix.new([3,3],[1,0,0, 1,1,0, 1,1,2], dtype: dtype)
          expect(a).to eq(b)
          expect(r).to eq(b)
        rescue NotImplementedError
          pending "potrf! not implemented without plugins"
        end
      end

      it "calculates cholesky decomposition using potrf (upper)" do
        pending("not yet implemented for :object dtype") if dtype == :object
        pending("not yet implemented for NMatrix-JRuby") if jruby?

        a = NMatrix.new([3,3],[1,1,1, 0,2,2, 0,0,6], dtype: dtype)
        begin
          r = a.potrf!(:upper)

          b = NMatrix.new([3,3],[1,1,1, 0,1,1, 0,0,2], dtype: dtype)
          expect(a).to eq(b)
          expect(r).to eq(b)
        rescue NotImplementedError
          pending "potrf! not implemented without plugins"
        end
      end

      it "calculates cholesky decomposition using #factorize_cholesky" do
        pending("not yet implemented for :object dtype") if dtype == :object
        a = NMatrix.new([3,3],[1,2,1, 2,13,5, 1,5,6], dtype: dtype)
        begin
          u,l = a.factorize_cholesky

          l_true = NMatrix.new([3,3],[1,0,0, 2,3,0, 1,1,2], dtype: dtype)
          u_true = l_true.transpose
          expect(u).to eq(u_true)
          expect(l).to eq(l_true)
        rescue NotImplementedError
          pending "potrf! not implemented without plugins"
        end
      end
    end
  end

  NON_INTEGER_DTYPES.each do |dtype|
    context dtype do

      it "calculates QR decomposition using factorize_qr for a square matrix" do
        pending("not yet implemented for :object dtype") if dtype == :object
        a = NMatrix.new(3, [12.0, -51.0,   4.0,
                             6.0, 167.0, -68.0,
                            -4.0,  24.0, -41.0] , dtype: dtype)

        q_solution = NMatrix.new([3,3], Q_SOLUTION_ARRAY_2, dtype: dtype)

        r_solution = NMatrix.new([3,3], [-14.0, -21.0, 14,
                                           0.0,  -175, 70,
                                           0.0, 0.0,  -35] , dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-13
              end

        begin
          q,r = a.factorize_qr

          expect(q).to be_within(err).of(q_solution)
          expect(r).to be_within(err).of(r_solution)

        rescue NotImplementedError
          pending "Suppressing a NotImplementedError when the lapacke plugin is not available"
        end
      end

      it "calculates QR decomposition using factorize_qr for a tall and narrow rectangular matrix" do
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        pending("not yet implemented for :object dtype") if dtype == :object

        a = NMatrix.new([4,2], [34.0, 21.0,
                                23.0, 53.0,
                                26.0, 346.0,
                                23.0, 121.0] , dtype: dtype)

        q_solution = NMatrix.new([4,4], Q_SOLUTION_ARRAY_1, dtype: dtype)

        r_solution = NMatrix.new([4,2], [-53.75872022286244, -255.06559574252242,
                                                        0.0,  269.34836526051555,
                                                        0.0,                 0.0,
                                                        0.0,                 0.0] , dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-13
              end

        begin
          q,r = a.factorize_qr

          expect(q).to be_within(err).of(q_solution)
          expect(r).to be_within(err).of(r_solution)

        rescue NotImplementedError
          pending "Suppressing a NotImplementedError when the lapacke plugin is not available"
        end
      end

      it "calculates QR decomposition using factorize_qr for a short and wide rectangular matrix" do
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        pending("not yet implemented for :object dtype") if dtype == :object

        a = NMatrix.new([3,4], [123,31,57,81,92,14,17,36,42,34,11,28], dtype: dtype)

        q_solution = NMatrix.new([3,3], Q_SOLUTION_ARRAY_3, dtype: dtype)

        r_solution = NMatrix.new([3,4], R_SOLUTION_ARRAY, dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-13
              end

        begin
          q,r = a.factorize_qr

          expect(q).to be_within(err).of(q_solution)
          expect(r).to be_within(err).of(r_solution)

        rescue NotImplementedError
          pending "Suppressing a NotImplementedError when the lapacke plugin is not available"
        end
      end

      it "calculates QR decomposition such that A - QR ~ 0" do
        pending("not yet implemented for :object dtype") if dtype == :object
        a = NMatrix.new([3,3], [ 9.0,  0.0, 26.0,
                                12.0,  0.0, -7.0,
                                 0.0,  4.0,  0.0] , dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-13
              end

        begin
          q,r = a.factorize_qr
          a_expected = q.dot(r)

          expect(a_expected).to be_within(err).of(a)

        rescue NotImplementedError
          pending "Suppressing a NotImplementedError when the lapacke plugin is not available"
        end
      end


      it "calculates the orthogonal matrix Q in QR decomposition" do
        pending("not yet implemented for :object dtype") if dtype == :object
        a = N.new([2,2], [34.0, 21, 23, 53] , dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-13
              end

        begin
          q,r = a.factorize_qr

          #Q is orthogonal if Q x Q.transpose = I
          product = q.dot(q.transpose)

          expect(product[0,0]).to be_within(err).of(1)
          expect(product[1,0]).to be_within(err).of(0)
          expect(product[0,1]).to be_within(err).of(0)
          expect(product[1,1]).to be_within(err).of(1)

        rescue NotImplementedError
          pending "Suppressing a NotImplementedError when the lapacke plugin is not available"
        end
      end
    end
  end

  ALL_DTYPES.each do |dtype|
    next if dtype == :byte #doesn't work for unsigned types

    context dtype do
      err = case dtype
              when :float32, :complex64
                1e-4
              else #integer matrices will return :float64
                1e-13
            end

      it "should correctly invert a matrix in place (bang)" do
        pending("not yet implemented for :object dtype") if dtype == :object
        a = NMatrix.new(:dense, 5, [1, 8,-9, 7, 5,
                                    0, 1, 0, 4, 4,
                                    0, 0, 1, 2, 5,
                                    0, 0, 0, 1,-5,
                                    0, 0, 0, 0, 1 ], dtype)
        b = NMatrix.new(:dense, 5, [1,-8, 9, 7, 17,
                                    0, 1, 0,-4,-24,
                                    0, 0, 1,-2,-15,
                                    0, 0, 0, 1,  5,
                                    0, 0, 0, 0,  1,], dtype)
        if a.integer_dtype?
          expect{a.invert!}.to raise_error(DataTypeError)
        else
          #should return inverse as well as modifying a
          r = a.invert!
          expect(a).to be_within(err).of(b)
          expect(r).to be_within(err).of(b)
        end
      end


      it "should correctly invert a dense matrix out-of-place" do
        pending("not yet implemented for :object dtype") if dtype == :object
        a = NMatrix.new(:dense, 3, [1,2,3,0,1,4,5,6,0], dtype)

        if a.integer_dtype?
          b = NMatrix.new(:dense, 3, [-24,18,5,20,-15,-4,-5,4,1], :float64)
        else
          b = NMatrix.new(:dense, 3, [-24,18,5,20,-15,-4,-5,4,1], dtype)
        end

        expect(a.invert).to be_within(err).of(b)
      end

      it "should correctly find exact inverse" do
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        a = NMatrix.new(:dense, 3, [1,2,3,0,1,4,5,6,0], dtype)
        b = NMatrix.new(:dense, 3, [-24,18,5,20,-15,-4,-5,4,1], dtype)

        expect(a.exact_inverse).to be_within(err).of(b)
      end

      it "should correctly find exact inverse" do
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        a = NMatrix.new(:dense, 2, [1,3,3,8], dtype)
        b = NMatrix.new(:dense, 2, [-8,3,3,-1], dtype)

        expect(a.exact_inverse).to be_within(err).of(b)
      end
    end
  end

  NON_INTEGER_DTYPES.each do |dtype|
    context dtype do
      err = Complex(1e-3, 1e-3)
      it "should correctly invert a 2x2 matrix" do
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        pending("not yet implemented for :object dtype") if dtype == :object
        if dtype == :complex64 || dtype == :complex128
          a = NMatrix.new([2, 2], [Complex(16, 81), Complex(91, 51), \
                                   Complex(13, 54), Complex(71, 24)], dtype: dtype)
          b = NMatrix.identity(2, dtype: dtype)

          begin
            expect(a.dot(a.pinv)).to be_within(err).of(b)
          rescue NotImplementedError
            pending "Suppressing a NotImplementedError when the atlas plugin is not available"
          end

        else
          a = NMatrix.new([2, 2], [141, 612, 9123, 654], dtype: dtype)
          b = NMatrix.identity(2, dtype: dtype)

          begin
            expect(a.dot(a.pinv)).to be_within(err).of(b)
          rescue NotImplementedError
            pending "Suppressing a NotImplementedError when the atlas plugin is not available"
          end
        end
      end

      it "should verify a.dot(b.dot(a)) == a and b.dot(a.dot(b)) == b" do
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        pending("not yet implemented for :object dtype") if dtype == :object
        if dtype == :complex64 || dtype == :complex128
          a = NMatrix.new([3, 2], [Complex(94, 11), Complex(87, 51), Complex(82, 39), \
                                   Complex(45, 16), Complex(25, 32), Complex(91, 43) ], dtype: dtype)

          begin
            b = a.pinv # pseudo inverse
            expect(a.dot(b.dot(a))).to be_within(err).of(a)
            expect(b.dot(a.dot(b))).to be_within(err).of(b)
          rescue NotImplementedError
            pending "Suppressing a NotImplementedError when the atlas plugin is not available"
          end

        else
          a = NMatrix.new([3, 3], [9, 4, 52, 12, 52, 1, 3, 55, 6], dtype: dtype)

          begin
            b = a.pinv # pseudo inverse
            expect(a.dot(b.dot(a))).to be_within(err).of(a)
            expect(b.dot(a.dot(b))).to be_within(err).of(b)
          rescue NotImplementedError
            pending "Suppressing a NotImplementedError when the atlas plugin is not available"
          end
        end
      end
    end
  end


  ALL_DTYPES.each do |dtype|
    next if dtype == :byte #doesn't work for unsigned types

    context dtype do
      err = case dtype
              when :float32, :complex64
                1e-4
              else #integer matrices will return :float64
                1e-13
            end

      it "should correctly find adjugate a matrix in place (bang)" do
        pending("not yet implemented for :object dtype") if dtype == :object
        a = NMatrix.new(:dense, 2, [2, 3, 3, 5], dtype)
        b = NMatrix.new(:dense, 2, [5, -3, -3, 2], dtype)

        if a.integer_dtype?
          expect{a.adjugate!}.to raise_error(DataTypeError)
        else
          #should return adjugate as well as modifying a
          r = a.adjugate!
          expect(a).to be_within(err).of(b)
          expect(r).to be_within(err).of(b)
        end
      end


      it "should correctly find adjugate of a matrix out-of-place" do
        pending("not yet implemented for :object dtype") if dtype == :object
        a = NMatrix.new(:dense, 3, [-3, 2, -5, -1, 0, -2, 3, -4, 1], dtype)

        if a.integer_dtype?
          b = NMatrix.new(:dense, 3, [-8, 18, -4, -5, 12, -1, 4, -6, 2], :float64)
        else
          b = NMatrix.new(:dense, 3, [-8, 18, -4, -5, 12, -1, 4, -6, 2], dtype)
        end

        expect(a.adjoint).to be_within(err).of(b)
        expect(a.adjugate).to be_within(err).of(b)
      end

    end
  end


  # TODO: Get it working with ROBJ too
  [:byte,:int8,:int16,:int32,:int64,:float32,:float64].each do |left_dtype|
    [:byte,:int8,:int16,:int32,:int64,:float32,:float64].each do |right_dtype|

      # Won't work if they're both 1-byte, due to overflow.
      next if [:byte,:int8].include?(left_dtype) && [:byte,:int8].include?(right_dtype)

      # For now, don't bother testing int-int mult.
      #next if [:int8,:int16,:int32,:int64].include?(left_dtype) && [:int8,:int16,:int32,:int64].include?(right_dtype)
      it "dense handles #{left_dtype.to_s} dot #{right_dtype.to_s} matrix multiplication" do
        #STDERR.puts "dtype=#{dtype.to_s}"
        #STDERR.puts "2"

        nary = if left_dtype.to_s =~ /complex/
                 COMPLEX_MATRIX43A_ARRAY
               else
                 MATRIX43A_ARRAY
               end

        mary = if right_dtype.to_s =~ /complex/
                 COMPLEX_MATRIX32A_ARRAY
               else
                 MATRIX32A_ARRAY
               end

        n = NMatrix.new([4,3], nary, dtype: left_dtype, stype: :dense)
        m = NMatrix.new([3,2], mary, dtype: right_dtype, stype: :dense)

        expect(m.shape[0]).to eq(3)
        expect(m.shape[1]).to eq(2)
        expect(m.dim).to eq(2)

        expect(n.shape[0]).to eq(4)
        expect(n.shape[1]).to eq(3)
        expect(n.dim).to eq(2)

        expect(n.shape[1]).to eq(m.shape[0])

        r = n.dot m

        expect(r[0,0]).to eq(273.0)
        expect(r[0,1]).to eq(455.0)
        expect(r[1,0]).to eq(243.0)
        expect(r[1,1]).to eq(235.0)
        expect(r[2,0]).to eq(244.0)
        expect(r[2,1]).to eq(205.0)
        expect(r[3,0]).to eq(102.0)
        expect(r[3,1]).to eq(160.0)

        #r.dtype.should == :float64 unless left_dtype == :float32 && right_dtype == :float32
      end
    end
  end

  [:byte,:int8,:int16,:int32,:int64,:float32,:float64].each do |left_dtype|
    [:byte,:int8,:int16,:int32,:int64,:float32,:float64].each do |right_dtype|

      # Won't work if they're both 1-byte, due to overflow.
      next if [:byte,:int8].include?(left_dtype) && [:byte,:int8].include?(right_dtype)

      it "dense handles #{left_dtype.to_s} dot #{right_dtype.to_s} vector multiplication" do
        #STDERR.puts "dtype=#{dtype.to_s}"
        #STDERR.puts "2"
        n = NMatrix.new([4,3], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0], dtype: left_dtype)

        m = NMatrix.new([3,1], [2.0, 1.0, 0.0], dtype: right_dtype)

        expect(m.shape[0]).to eq(3)
        expect(m.shape[1]).to eq(1)

        expect(n.shape[0]).to eq(4)
        expect(n.shape[1]).to eq(3)
        expect(n.dim).to eq(2)

        expect(n.shape[1]).to eq(m.shape[0])

        r = n.dot m
        # r.class.should == NVector

        expect(r[0,0]).to eq(4)
        expect(r[1,0]).to eq(13)
        expect(r[2,0]).to eq(22)
        expect(r[3,0]).to eq(31)

        #r.dtype.should == :float64 unless left_dtype == :float32 && right_dtype == :float32
      end
    end
  end

  ALL_DTYPES.each do |dtype|
    next if integer_dtype?(dtype)
    context "#cov dtype #{dtype}" do
      before do
        @n = NMatrix.new( [5,3], [4.0,2.0,0.60,
                                  4.2,2.1,0.59,
                                  3.9,2.0,0.58,
                                  4.3,2.1,0.62,
                                  4.1,2.2,0.63], dtype: dtype)
      end

      it "calculates sample covariance matrix" do
        pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
        expect(@n.cov).to be_within(0.0001).of(NMatrix.new([3,3],
          [0.025  , 0.0075, 0.00175,
           0.0075, 0.007 , 0.00135,
           0.00175, 0.00135 , 0.00043 ], dtype: dtype)
        )
      end

      it "calculates population covariance matrix" do
        pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
        expect(@n.cov(for_sample_data: false)).to be_within(0.0001).of(NMatrix.new([3,3],
                  [2.0000e-02, 6.0000e-03, 1.4000e-03,
                   6.0000e-03, 5.6000e-03, 1.0800e-03,
                   1.4000e-03, 1.0800e-03, 3.4400e-04], dtype: dtype)
                )
      end
    end

    context "#corr #{dtype}" do
      it "calculates the correlation matrix" do
        pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
        n = NMatrix.new([5,3], [4.0,2.0,0.60,
                                4.2,2.1,0.59,
                                3.9,2.0,0.58,
                                4.3,2.1,0.62,
                                4.1,2.2,0.63], dtype: dtype)
        expect(n.corr).to be_within(0.001).of(NMatrix.new([3,3],
          [1.00000, 0.56695, 0.53374,
           0.56695, 1.00000, 0.77813,
           0.53374, 0.77813, 1.00000], dtype: dtype))
      end unless dtype =~ /complex/
    end

    context "#symmetric? for #{dtype}" do
      it "should return true for symmetric matrix" do
        n = NMatrix.new([3,3], [1.00000, 0.56695, 0.53374,
                                0.56695, 1.00000, 0.77813,
                                0.53374, 0.77813, 1.00000], dtype: dtype)
        expect(n.symmetric?).to be_truthy
      end
    end

    context "#hermitian? for #{dtype}" do
      it "should return true for complex hermitian or non-complex symmetric matrix" do
        n = NMatrix.new([3,3], [1.00000, 0.56695, 0.53374,
                                0.56695, 1.00000, 0.77813,
                                0.53374, 0.77813, 1.00000], dtype: dtype) unless dtype =~ /complex/
        n = NMatrix.new([3,3], [1.1, Complex(1.2,1.3), Complex(1.4,1.5),
                                Complex(1.2,-1.3), 1.9, Complex(1.8,1.7),
                                Complex(1.4,-1.5), Complex(1.8,-1.7), 1.3], dtype: dtype) if dtype =~ /complex/
        expect(n.hermitian?).to be_truthy
      end
    end

    context "#permute_columns for #{dtype}" do
      it "check that #permute_columns works correctly by considering every premutation of a 3x3 matrix" do
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        n = NMatrix.new([3,3], [1,0,0,
                                0,2,0,
                                0,0,3], dtype: dtype)
        expect(n.permute_columns([0,1,2], {convention: :intuitive})).to eq(NMatrix.new([3,3], [1,0,0,
                                                                                              0,2,0,
                                                                                              0,0,3], dtype: dtype))
        expect(n.permute_columns([0,2,1], {convention: :intuitive})).to eq(NMatrix.new([3,3], [1,0,0,
                                                                                              0,0,2,
                                                                                              0,3,0], dtype: dtype))
        expect(n.permute_columns([1,0,2], {convention: :intuitive})).to eq(NMatrix.new([3,3], [0,1,0,
                                                                                              2,0,0,
                                                                                              0,0,3], dtype: dtype))
        expect(n.permute_columns([1,2,0], {convention: :intuitive})).to eq(NMatrix.new([3,3], [0,0,1,
                                                                                              2,0,0,
                                                                                              0,3,0], dtype: dtype))
        expect(n.permute_columns([2,0,1], {convention: :intuitive})).to eq(NMatrix.new([3,3], [0,1,0,
                                                                                              0,0,2,
                                                                                              3,0,0], dtype: dtype))
        expect(n.permute_columns([2,1,0], {convention: :intuitive})).to eq(NMatrix.new([3,3], [0,0,1,
                                                                                              0,2,0,
                                                                                              3,0,0], dtype: dtype))
        expect(n.permute_columns([0,1,2], {convention: :lapack})).to eq(NMatrix.new([3,3], [1,0,0,
                                                                                           0,2,0,
                                                                                           0,0,3], dtype: dtype))
        expect(n.permute_columns([0,2,2], {convention: :lapack})).to eq(NMatrix.new([3,3], [1,0,0,
                                                                                           0,0,2,
                                                                                           0,3,0], dtype: dtype))
        expect(n.permute_columns([1,1,2], {convention: :lapack})).to eq(NMatrix.new([3,3], [0,1,0,
                                                                                           2,0,0,
                                                                                           0,0,3], dtype: dtype))
        expect(n.permute_columns([1,2,2], {convention: :lapack})).to eq(NMatrix.new([3,3], [0,0,1,
                                                                                           2,0,0,
                                                                                           0,3,0], dtype: dtype))
        expect(n.permute_columns([2,2,2], {convention: :lapack})).to eq(NMatrix.new([3,3], [0,1,0,
                                                                                           0,0,2,
                                                                                           3,0,0], dtype: dtype))
        expect(n.permute_columns([2,1,2], {convention: :lapack})).to eq(NMatrix.new([3,3], [0,0,1,
                                                                                           0,2,0,
                                                                                           3,0,0], dtype: dtype))
      end
      it "additional tests for  #permute_columns with convention :intuitive" do
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        m = NMatrix.new([1,4], [0,1,2,3], dtype: dtype)
        perm = [1,0,3,2]
        expect(m.permute_columns(perm, {convention: :intuitive})).to eq(NMatrix.new([1,4], perm, dtype: dtype))

        m = NMatrix.new([1,5], [0,1,2,3,4], dtype: dtype)
        perm = [1,0,4,3,2]
        expect(m.permute_columns(perm, {convention: :intuitive})).to eq(NMatrix.new([1,5], perm, dtype: dtype))

        m = NMatrix.new([1,6], [0,1,2,3,4,5], dtype: dtype)
        perm = [2,4,1,0,5,3]
        expect(m.permute_columns(perm, {convention: :intuitive})).to eq(NMatrix.new([1,6], perm, dtype: dtype))

        m = NMatrix.new([1,7], [0,1,2,3,4,5,6], dtype: dtype)
        perm = [1,3,5,6,0,2,4]
        expect(m.permute_columns(perm, {convention: :intuitive})).to eq(NMatrix.new([1,7], perm, dtype: dtype))

        m = NMatrix.new([1,8], [0,1,2,3,4,5,6,7], dtype: dtype)
        perm = [6,7,5,4,1,3,0,2]
        expect(m.permute_columns(perm, {convention: :intuitive})).to eq(NMatrix.new([1,8], perm, dtype: dtype))
      end
    end
  end

  context "#solve" do
    NON_INTEGER_DTYPES.each do |dtype|

      it "solves linear equation for dtype #{dtype}" do
        pending("not yet implemented for :object dtype") if dtype == :object
        pending("not yet implemented for NMatrix-JRuby") if jruby?
        a = NMatrix.new [2,2], [3,1,1,2], dtype: dtype
        b = NMatrix.new [2,1], [9,8], dtype: dtype

        expect(a.solve(b)).to eq(NMatrix.new [2,1], [2,3], dtype: dtype)
      end

      it "solves linear equation for #{dtype} (non-symmetric matrix)" do
        pending("not yet implemented for :object dtype") if dtype == :object
        pending("not yet implemented for NMatrix-JRuby") if jruby?

        a = NMatrix.new [3,3], [1,1,1, -1,0,1, 3,4,6], dtype: dtype
        b = NMatrix.new [3,1], [6,2,29], dtype: dtype

        err = case dtype
                when :float32, :complex64
                  1e-5
                else
                  1e-14
              end

        expect(a.solve(b)).to be_within(err).of(NMatrix.new([3,1], [1,2,3], dtype: dtype))
      end

      it "solves linear equation for dtype #{dtype} (non-vector rhs)" do
        pending("not yet implemented for :object dtype") if dtype == :object
        pending("not yet implemented for NMatrix-JRuby") if jruby?

        a = NMatrix.new [3,3], [1,0,0, -1,0,1, 2,1,1], dtype: dtype
        b = NMatrix.new [3,2], [1,0, 1,2, 4,2], dtype: dtype

        expect(a.solve(b)).to eq(NMatrix.new [3,2], [1,0, 0,0, 2,2], dtype: dtype)
      end
    end

    FLOAT_DTYPES.each do |dtype|
      context "when form: :lower_tri" do
        let(:a) { NMatrix.new([3,3], [1, 0, 0, 2, 0.5, 0, 3, 3, 9], dtype: dtype) }

        it "solves a lower triangular linear system A * x = b with vector b" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          b = NMatrix.new([3,1], [1,2,3], dtype: dtype)
          x = a.solve(b, form: :lower_tri)
          r = a.dot(x) - b
          expect(r.abs.max).to be_within(1e-6).of(0.0)
        end

        it "solves a lower triangular linear system A * X = B with narrow B" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          b = NMatrix.new([3,2], [1,2,3,4,5,6], dtype: dtype)
          x = a.solve(b, form: :lower_tri)
          r = (a.dot(x) - b).abs.to_flat_a
          expect(r.max).to be_within(1e-6).of(0.0)
        end

        it "solves a lower triangular linear system A * X = B with wide B" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          b = NMatrix.new([3,5], (1..15).to_a, dtype: dtype)
          x = a.solve(b, form: :lower_tri)
          r = (a.dot(x) - b).abs.to_flat_a
          expect(r.max).to be_within(1e-6).of(0.0)
        end
      end

      context "when form: :upper_tri" do
        let(:a) { NMatrix.new([3,3], [3, 2, 1, 0, 2, 0.5, 0, 0, 9], dtype: dtype) }

        it "solves an upper triangular linear system A * x = b with vector b" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          b = NMatrix.new([3,1], [1,2,3], dtype: dtype)
          x = a.solve(b, form: :upper_tri)
          r = a.dot(x) - b
          expect(r.abs.max).to be_within(1e-6).of(0.0)
        end

        it "solves an upper triangular linear system A * X = B with narrow B" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          b = NMatrix.new([3,2], [1,2,3,4,5,6], dtype: dtype)
          x = a.solve(b, form: :upper_tri)
          r = (a.dot(x) - b).abs.to_flat_a
          expect(r.max).to be_within(1e-6).of(0.0)
        end

        it "solves an upper triangular linear system A * X = B with a wide B" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          b = NMatrix.new([3,5], (1..15).to_a, dtype: dtype)
          x = a.solve(b, form: :upper_tri)
          r = (a.dot(x) - b).abs.to_flat_a
          expect(r.max).to be_within(1e-6).of(0.0)
        end
      end

      context "when form: :pos_def" do
        let(:a) { NMatrix.new([3,3], [4, 1, 2, 1, 5, 3, 2, 3, 6], dtype: dtype) }

        it "solves a linear system A * X = b with positive definite A and vector b" do
          b = NMatrix.new([3,1], [6,4,8], dtype: dtype)
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          begin
            x = a.solve(b, form: :pos_def)
            expect(x).to be_within(1e-6).of(NMatrix.new([3,1], [1,0,1], dtype: dtype))
          rescue NotImplementedError
            "Suppressing a NotImplementedError when the lapacke or atlas plugin is not available"
          end
        end

        it "solves a linear system A * X = B with positive definite A and matrix B" do
          b = NMatrix.new([3,2], [8,3,14,13,14,19], dtype: dtype)
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          begin
            x = a.solve(b, form: :pos_def)
            expect(x).to be_within(1e-6).of(NMatrix.new([3,2], [1,-1,2,1,1,3], dtype: dtype))
          rescue NotImplementedError
            "Suppressing a NotImplementedError when the lapacke or atlas plugin is not available"
          end
        end
      end
    end
  end

  context "#least_squares" do
    it "finds the least squares approximation to the equation A * X = B" do
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      a = NMatrix.new([3,2], [2.0, 0, -1, 1, 0, 2])
      b = NMatrix.new([3,1], [1.0, 0, -1])
      solution = NMatrix.new([2,1], [1.0 / 3 , -1.0 / 3], dtype: :float64)

      begin
        least_squares = a.least_squares(b)
        expect(least_squares).to be_within(0.0001).of solution
      rescue NotImplementedError
        "Suppressing a NotImplementedError when the lapacke or atlas plugin is not available"
      end
    end

    it "finds the least squares approximation to the equation A * X = B with high tolerance" do
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      a = NMatrix.new([4,2], [1.0, 1, 1, 2, 1, 3,1,4])
      b = NMatrix.new([4,1], [6.0, 5, 7, 10])
      solution = NMatrix.new([2,1], [3.5 , 1.4], dtype: :float64)

      begin
        least_squares = a.least_squares(b, tolerance: 10e-5)
        expect(least_squares).to be_within(0.0001).of solution
      rescue NotImplementedError
        "Suppressing a NotImplementedError when the lapacke or atlas plugin is not available"
      end
    end
  end

  context "#hessenberg" do
    FLOAT_DTYPES.each do |dtype|
      context dtype do
        before do
          @n = NMatrix.new [5,5],
            [0, 2, 0, 1, 1,
             2, 2, 3, 2, 2,
             4,-3, 0, 1, 3,
             6, 1,-6,-5, 4,
             5, 6, 4, 1, 5], dtype: dtype
        end

        it "transforms a matrix to Hessenberg form" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          expect(@n.hessenberg).to be_within(0.0001).of(NMatrix.new([5,5],
            [0.00000,-1.66667, 0.79432,-0.45191,-1.54501,
            -9.00000, 2.95062,-6.89312, 3.22250,-0.19012,
             0.00000,-8.21682,-0.57379, 5.26966,-1.69976,
             0.00000, 0.00000,-3.74630,-0.80893, 3.99708,
             0.00000, 0.00000, 0.00000, 0.04102, 0.43211], dtype: dtype))
        end
      end
    end
  end

  ALL_DTYPES.each do |dtype|
    [:dense, :yale].each do |stype|
      answer_dtype = integer_dtype?(dtype) ? :int64 : dtype
      next if dtype == :byte

      context "#pow #{dtype} #{stype}" do
        before do
          @n = NMatrix.new [4,4], [0, 2, 0, 1,
                                  2, 2, 3, 2,
                                  4,-3, 0, 1,
                                  6, 1,-6,-5], dtype: dtype, stype: stype
        end

        it "raises a square matrix to even power" do
          pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
          expect(@n.pow(4)).to eq(NMatrix.new([4,4], [292, 28,-63, -42,
                                                     360, 96, 51, -14,
                                                     448,-231,-24,-87,
                                                   -1168, 595,234, 523],
                                                   dtype: answer_dtype,
                                                   stype: stype))
        end

        it "raises a square matrix to odd power" do
          pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
          expect(@n.pow(9)).to eq(NMatrix.new([4,4],[-275128,  279917, 176127, 237451,
                                                    -260104,  394759, 166893,  296081,
                                                    -704824,  285700, 186411,  262002,
                                                    3209256,-1070870,-918741,-1318584],
                                                    dtype: answer_dtype, stype: stype))
        end

        it "raises a sqaure matrix to negative power" do
          expect(@n.pow(-3)).to be_within(0.00001).of (NMatrix.new([4,4],
            [1.0647e-02, 4.2239e-04,-6.2281e-05, 2.7680e-03,
            -1.6415e-02, 2.1296e-02, 1.0718e-02, 4.8589e-03,
             8.6956e-03,-8.6569e-03, 2.8993e-02, 7.2015e-03,
             5.0034e-02,-1.7500e-02,-3.6777e-02,-1.2128e-02], dtype: answer_dtype,
             stype: stype))
        end unless stype =~ /yale/ or dtype == :object or ALL_DTYPES.grep(/int/).include? dtype

        it "raises a square matrix to zero" do
          pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
          expect(@n.pow(0)).to eq(NMatrix.eye([4,4], dtype: answer_dtype,
            stype: stype))
        end

        it "raises a square matrix to one" do
          pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
          expect(@n.pow(1)).to eq(@n)
        end
      end
    end
  end

  ALL_DTYPES.each do |dtype|
    [:dense, :yale].each do |stype|
      context "#kron_prod #{dtype} #{stype}" do
        before do
          @a = NMatrix.new([2,2], [1,2,
                                   3,4], dtype: dtype, stype: stype)
          @b = NMatrix.new([2,3], [1,1,1,
                                   1,1,1], dtype: dtype, stype: stype)
          @c = NMatrix.new([4,6], [1, 1, 1, 2, 2, 2,
                                   1, 1, 1, 2, 2, 2,
                                   3, 3, 3, 4, 4, 4,
                                   3, 3, 3, 4, 4, 4], dtype: dtype, stype: stype)
        end
        it "computes the Kronecker product of two NMatrix objects" do
          pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
          expect(@a.kron_prod(@b)).to eq(@c)
        end
      end
    end
  end

  context "determinants" do
    ALL_DTYPES.each do |dtype|
      context dtype do
        pending("not yet implemented for :object dtype") if dtype == :object
        before do
          @a = NMatrix.new([2,2], [1,2,
                                   3,4], dtype: dtype)
          @b = NMatrix.new([3,3], [1,2,3,
                                   5,0,1,
                                   4,1,3], dtype: dtype)
          @c = NMatrix.new([4,4], [1, 0, 1, 1,
                                   1, 2, 3, 1,
                                   3, 3, 3, 1,
                                   1, 2, 3, 4], dtype: dtype)
          @err = case dtype
                  when :float32, :complex64
                    1e-6
                  when :float64, :complex128
                    1e-14
                  else
                    1e-64 # FIXME: should be 0, but be_within(0) does not work.
                end
        end
        it "computes the determinant of 2x2 matrix" do
          pending("not yet implemented for :object dtype") if dtype == :object
          expect(@a.det).to be_within(@err).of(-2)
        end
        it "computes the determinant of 3x3 matrix" do
          pending("not yet implemented for :object dtype") if dtype == :object
          expect(@b.det).to be_within(@err).of(-8)
        end
        it "computes the determinant of 4x4 matrix" do
          pending("not yet implemented for :object dtype") if dtype == :object
          expect(@c.det).to be_within(@err).of(-18)
        end
        it "computes the exact determinant of 2x2 matrix" do
          pending("not yet implemented for :object dtype") if dtype == :object
          if dtype == :byte
            expect{@a.det_exact}.to raise_error(DataTypeError)
          else
            pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
            expect(@a.det_exact).to be_within(@err).of(-2)
          end
        end
        it "computes the exact determinant of 3x3 matrix" do
          pending("not yet implemented for :object dtype") if dtype == :objectx
          if dtype == :byte
            expect{@a.det_exact}.to raise_error(DataTypeError)
          else
            pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
            expect(@b.det_exact).to be_within(@err).of(-8)
          end
        end
      end
    end
  end

  context "#scale and #scale!" do
    [:dense,:list,:yale].each do |stype|
      ALL_DTYPES.each do |dtype|
        context "for #{dtype}" do
          before do
            @m = NMatrix.new([3, 3], [0, 1, 2,
                                      3, 4, 5,
                                      6, 7, 8], stype: stype, dtype: dtype)
          end

          it "scales the matrix by a given factor and return the result" do
            pending("not yet implemented for :object dtype") if dtype == :object
            if integer_dtype? dtype
              expect{@m.scale 2.0}.to raise_error(DataTypeError)
            else
              pending("not yet implemented for NMatrix-JRuby") if jruby? and (dtype == :complex64 || dtype == :complex128)
              expect(@m.scale 2.0).to eq(NMatrix.new([3, 3], [0,  2,  4,
                                                             6,  8,  10,
                                                             12, 14, 16], stype: stype, dtype: dtype))
            end
          end

          it "scales the matrix in place by a given factor" do
            pending("not yet implemented for :object dtype") if dtype == :object
            if dtype == :int8
              expect{@m.scale! 2}.to raise_error(DataTypeError)
            else
              pending("not yet implemented for NMatrix-JRuby") if jruby? and (dtype == :complex64 || dtype == :complex128)
              @m.scale! 2
              expect(@m).to eq(NMatrix.new([3, 3], [0,  2,  4,
                                                    6,  8,  10,
                                                    12, 14, 16], stype: stype, dtype: dtype))
            end
          end
        end
      end
    end
  end
  context "matrix_norm" do
    ALL_DTYPES.each do |dtype|
      context dtype do
        pending("not yet implemented for :object dtype") if dtype == :object
        before do
          @n = NMatrix.new([3,3], [-4,-3,-2,
                                   -1, 0, 1,
                                    2, 3, 4], dtype: dtype)

          @matrix_norm_TOLERANCE = 1.0e-10
        end

        it "should default to 2-matrix_norm" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          if(dtype == :byte)
            expect{@n.matrix_norm}.to raise_error(ArgumentError)
          else
            begin
              expect(@n.matrix_norm).to be_within(@matrix_norm_TOLERANCE).of(7.348469228349535)

              rescue NotImplementedError
                pending "Suppressing a NotImplementedError when the lapacke plugin is not available"
            end
          end
        end

        it "should reject invalid arguments" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?

          expect{@n.matrix_norm(0.5)}.to raise_error(ArgumentError)
        end

        it "should calculate 1 and 2(minus) matrix_norms correctly" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          if(dtype == :byte)
              expect{@n.matrix_norm(1)}.to raise_error(ArgumentError)
              expect{@n.matrix_norm(-2)}.to raise_error(ArgumentError)
              expect{@n.matrix_norm(-1)}.to raise_error(ArgumentError)
          else
            expect(@n.matrix_norm(1)).to eq(7)
            begin

              #FIXME: change to the correct value when overflow issue is resolved
              #expect(@n.matrix_norm(-2)).to eq(1.8628605857884395e-07)
              expect(@n.matrix_norm(-2)).to be_within(@matrix_norm_TOLERANCE).of(0.0)
              rescue NotImplementedError
                pending "Suppressing a NotImplementedError when the lapacke plugin is not available"
            end
            expect(@n.matrix_norm(-1)).to eq(6)
          end
        end

        it "should calculate infinity matrix_norms correctly" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          if(dtype == :byte)
            expect{@n.matrix_norm(:inf)}.to raise_error(ArgumentError)
            expect{@n.matrix_norm(:'-inf')}.to raise_error(ArgumentError)
          else
            expect(@n.matrix_norm(:inf)).to eq(9)
            expect(@n.matrix_norm(:'-inf')).to eq(2)
          end
        end

        it "should calculate frobenius matrix_norms correctly" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          if(dtype == :byte)
            expect{@n.matrix_norm(:fro)}.to raise_error(ArgumentError)
          else
            expect(@n.matrix_norm(:fro)).to be_within(@matrix_norm_TOLERANCE).of(7.745966692414834)
          end
        end
      end
    end
  end

  context "#positive_definite?" do
      it "should return true for positive_definite? matrix" do
        n = NMatrix.new([3,3], [2, -1, -1,
                                -1, 2, -1,
                                -1, -1, 3])
        expect(n.positive_definite?).to be_truthy
      end
  end
  
  context "#svd_rank" do 
    FLOAT_DTYPES.each do |dtype|
      context dtype do
        #examples from https://www.cliffsnotes.com/study-guides/algebra/linear-algebra/real-euclidean-vector-spaces/the-rank-of-a-matrix
        it "calculates the rank of matrix using singular value decomposition with NMatrix on rectangular matrix without tolerence" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          a = NMatrix.new([4,3],[2,-1,3, 1,0,1, 0,2,-1, 1,1,4], dtype: dtype)

          begin
            rank = a.svd_rank()
          
            rank_true = 3
            expect(rank).to eq (rank_true)

          rescue NotImplementedError
            pending "Suppressing a NotImplementedError when the lapacke plugin is not available" 
          end         
        end

        it "calculates the rank of matrix using singular value decomposition with NMatrix on rectangular matrix with tolerence" do
        
          a = NMatrix.new([4,3],[2,-1,3, 1,0,1, 0,2,-1, 1,1,4], dtype: dtype)
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          begin
            rank = a.svd_rank(4)
  
            rank_true = 1
            expect(rank).to eq (rank_true)

          rescue NotImplementedError
             pending "Suppressing a NotImplementedError when the lapacke plugin is not available" 
          end
        end

        it "calculates the rank of matrix using singular value decomposition with NMatrix on square matrix without tolerence" do
        
          a = NMatrix.new([4,4],[1,-1,1,-1, -1,1,-1,1, 1,-1,1,-1, -1,1,-1,1], dtype: dtype)
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          begin
            rank = a.svd_rank()
          
            rank_true = 1
            expect(rank).to eq (rank_true)

          rescue NotImplementedError
             pending "Suppressing a NotImplementedError when the lapacke plugin is not available" 
          end
        end

        it "calculates the rank of matrix using singular value decomposition with NMatrix on square matrix with very small tolerence(for float32)" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          a = NMatrix.new([4,4],[1,-1,1,-1, -1,1,-1,1, 1,-1,1,-1, -1,1,-1,1], dtype: :float32)

          begin
            rank = a.svd_rank(1.7881389169360773e-08)
          
            rank_true = 2
            expect(rank).to eq (rank_true)

          rescue NotImplementedError
             pending "Suppressing a NotImplementedError when the lapacke plugin is not available" 
          end
        end

        it "calculates the rank of matrix using singular value decomposition with NMatrix on square matrix with very small tolerence(for float64)" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          a = NMatrix.new([4,4],[1,-1,1,-1, -1,1,-1,1, 1,-1,1,-1, -1,1,-1,1], dtype: :float64)

          begin
            rank = a.svd_rank(1.7881389169360773e-08)
          
            rank_true = 1
            expect(rank).to eq (rank_true)

          rescue NotImplementedError
             pending "Suppressing a NotImplementedError when the lapacke plugin is not available" 
          end
        end

      end
    end 
  end 

end


================================================
FILE: spec/nmatrix_yale_resize_test_associations.yaml
================================================
---
0: 0
1: 0
2: 1
3: 1
4: 2
5: 437
6: 2
7: 347
8: 3
9: 52
10: 3
11: 590
12: 3
13: 562
14: 562
15: 5
16: 5
17: 405
18: 603
19: 186
20: 7
21: 347
22: 7
23: 8
24: 497
25: 9
26: 570
27: 10
28: 10
29: 11
30: 11
31: 11
32: 12
33: 12
34: 13
35: 212
36: 248
37: 428
38: 458
39: 448
40: 428
41: 14
42: 14
43: 14
44: 14
45: 580
46: 15
47: 612
48: 453
49: 16
50: 328
51: 16
52: 41
53: 24
54: 616
55: 616
56: 616
57: 616
58: 616
59: 17
60: 165
61: 579
62: 19
63: 19
64: 19
65: 19
66: 19
67: 20
68: 20
69: 20
70: 572
71: 575
72: 21
73: 594
74: 22
75: 606
76: 22
77: 546
78: 23
79: 23
80: 515
81: 23
82: 24
83: 380
84: 25
85: 26
86: 26
87: 26
88: 26
89: 27
90: 27
91: 27
92: 27
93: 27
94: 27
95: 288
96: 27
97: 27
98: 28
99: 367
100: 29
101: 241
102: 241
103: 29
104: 30
105: 30
106: 30
107: 30
108: 30
109: 30
110: 30
111: 30
112: 30
113: 310
114: 30
115: 598
116: 30
117: 31
118: 31
119: 31
120: 175
121: 529
122: 31
123: 337
124: 31
125: 31
126: 31
127: 475
128: 31
129: 31
130: 31
131: 31
132: 31
133: 31
134: 96
135: 401
136: 31
137: 31
138: 470
139: 31
140: 31
141: 151
142: 32
143: 32
144: 32
145: 32
146: 32
147: 32
148: 32
149: 33
150: 33
151: 33
152: 278
153: 33
154: 34
155: 423
156: 34
157: 34
158: 458
159: 34
160: 549
161: 34
162: 151
163: 34
164: 34
165: 553
166: 34
167: 49
168: 34
169: 112
170: 34
171: 35
172: 35
173: 493
174: 551
175: 35
176: 35
177: 35
178: 520
179: 35
180: 578
181: 36
182: 36
183: 36
184: 36
185: 36
186: 36
187: 36
188: 36
189: 36
190: 36
191: 36
192: 36
193: 171
194: 519
195: 37
196: 38
197: 88
198: 195
199: 605
200: 39
201: 100
202: 40
203: 82
204: 465
205: 530
206: 322
207: 42
208: 42
209: 42
210: 42
211: 610
212: 503
213: 166
214: 44
215: 44
216: 263
217: 45
218: 558
219: 321
220: 45
221: 45
222: 45
223: 414
224: 367
225: 58
226: 46
227: 46
228: 46
229: 46
230: 47
231: 47
232: 292
233: 515
234: 48
235: 48
236: 48
237: 62
238: 237
239: 48
240: 48
241: 48
242: 48
243: 476
244: 537
245: 48
246: 48
247: 48
248: 48
249: 431
250: 51
251: 48
252: 48
253: 48
254: 49
255: 573
256: 49
257: 49
258: 123
259: 49
260: 49
261: 316
262: 401
263: 291
264: 51
265: 52
266: 476
267: 52
268: 52
269: 53
270: 53
271: 53
272: 333
273: 540
274: 53
275: 53
276: 53
277: 54
278: 54
279: 575
280: 54
281: 96
282: 55
283: 571
284: 55
285: 424
286: 458
287: 56
288: 56
289: 56
290: 56
291: 168
292: 56
293: 56
294: 300
295: 57
296: 509
297: 58
298: 58
299: 58
300: 58
301: 58
302: 58
303: 58
304: 58
305: 58
306: 58
307: 58
308: 59
309: 59
310: 59
311: 59
312: 287
313: 102
314: 59
315: 59
316: 59
317: 59
318: 59
319: 337
320: 212
321: 559
322: 580
323: 525
324: 464
325: 61
326: 61
327: 61
328: 590
329: 63
330: 138
331: 358
332: 64
333: 64
334: 64
335: 64
336: 64
337: 64
338: 64
339: 535
340: 358
341: 64
342: 395
343: 64
344: 64
345: 64
346: 65
347: 65
348: 522
349: 609
350: 65
351: 65
352: 66
353: 66
354: 66
355: 66
356: 66
357: 401
358: 176
359: 67
360: 67
361: 257
362: 360
363: 67
364: 92
365: 67
366: 96
367: 407
368: 543
369: 69
370: 69
371: 322
372: 70
373: 70
374: 70
375: 595
376: 595
377: 595
378: 72
379: 549
380: 72
381: 72
382: 580
383: 453
384: 74
385: 74
386: 74
387: 74
388: 74
389: 75
390: 75
391: 75
392: 75
393: 572
394: 520
395: 417
396: 75
397: 75
398: 75
399: 75
400: 349
401: 75
402: 75
403: 349
404: 75
405: 75
406: 75
407: 75
408: 349
409: 75
410: 551
411: 75
412: 75
413: 75
414: 75
415: 75
416: 75
417: 76
418: 76
419: 77
420: 77
421: 77
422: 77
423: 77
424: 77
425: 77
426: 78
427: 78
428: 79
429: 79
430: 79
431: 445
432: 79
433: 79
434: 79
435: 79
436: 79
437: 79
438: 79
439: 79
440: 79
441: 79
442: 79
443: 80
444: 571
445: 570
446: 80
447: 80
448: 80
449: 80
450: 81
451: 81
452: 82
453: 82
454: 83
455: 83
456: 83
457: 196
458: 83
459: 364
460: 322
461: 612
462: 492
463: 83
464: 83
465: 448
466: 83
467: 515
468: 448
469: 341
470: 196
471: 83
472: 83
473: 521
474: 83
475: 83
476: 84
477: 84
478: 85
479: 85
480: 85
481: 598
482: 579
483: 577
484: 87
485: 88
486: 88
487: 88
488: 88
489: 88
490: 88
491: 88
492: 88
493: 88
494: 88
495: 88
496: 88
497: 88
498: 338
499: 89
500: 90
501: 516
502: 91
503: 403
504: 92
505: 93
506: 93
507: 93
508: 94
509: 333
510: 95
511: 95
512: 95
513: 211
514: 95
515: 95
516: 96
517: 318
518: 228
519: 96
520: 96
521: 526
522: 96
523: 96
524: 427
525: 96
526: 96
527: 96
528: 607
529: 96
530: 96
531: 534
532: 96
533: 96
534: 96
535: 96
536: 395
537: 96
538: 395
539: 96
540: 149
541: 534
542: 96
543: 583
544: 96
545: 96
546: 96
547: 96
548: 395
549: 96
550: 407
551: 96
552: 97
553: 97
554: 98
555: 98
556: 581
557: 431
558: 99
559: 100
560: 100
561: 100
562: 100
563: 100
564: 407
565: 100
566: 100
567: 100
568: 100
569: 374
570: 100
571: 100
572: 100
573: 100
574: 100
575: 100
576: 100
577: 100
578: 100
579: 100
580: 100
581: 100
582: 100
583: 100
584: 100
585: 100
586: 100
587: 100
588: 100
589: 100
590: 100
591: 100
592: 100
593: 464
594: 419
595: 248
596: 287
597: 102
598: 392
599: 102
600: 102
601: 102
602: 102
603: 102
604: 102
605: 102
606: 102
607: 102
608: 102
609: 103
610: 508
611: 420
612: 606
613: 466
614: 554
615: 104
616: 105
617: 105
618: 105
619: 106
620: 347
621: 107
622: 107
623: 107
624: 108
625: 108
626: 109
627: 615
628: 615
629: 594
630: 110
631: 110
632: 110
633: 110
634: 110
635: 110
636: 110
637: 110
638: 111
639: 112
640: 112
641: 112
642: 112
643: 112
644: 112
645: 112
646: 112
647: 423
648: 112
649: 250
650: 466
651: 112
652: 423
653: 112
654: 527
655: 112
656: 112
657: 407
658: 527
659: 112
660: 423
661: 112
662: 112
663: 423
664: 231
665: 113
666: 113
667: 369
668: 114
669: 114
670: 114
671: 115
672: 115
673: 115
674: 192
675: 116
676: 229
677: 488
678: 117
679: 117
680: 117
681: 117
682: 118
683: 118
684: 118
685: 118
686: 118
687: 118
688: 118
689: 118
690: 118
691: 118
692: 118
693: 118
694: 119
695: 119
696: 119
697: 228
698: 120
699: 120
700: 419
701: 515
702: 121
703: 121
704: 121
705: 438
706: 122
707: 123
708: 123
709: 554
710: 123
711: 124
712: 124
713: 374
714: 126
715: 347
716: 320
717: 126
718: 126
719: 126
720: 126
721: 126
722: 126
723: 480
724: 126
725: 126
726: 126
727: 126
728: 126
729: 126
730: 509
731: 126
732: 332
733: 379
734: 379
735: 126
736: 388
737: 388
738: 126
739: 126
740: 248
741: 528
742: 126
743: 573
744: 126
745: 126
746: 448
747: 126
748: 126
749: 126
750: 341
751: 126
752: 158
753: 341
754: 127
755: 127
756: 127
757: 127
758: 127
759: 128
760: 128
761: 518
762: 129
763: 426
764: 129
765: 129
766: 317
767: 129
768: 560
769: 317
770: 560
771: 347
772: 518
773: 518
774: 129
775: 518
776: 129
777: 130
778: 597
779: 130
780: 130
781: 130
782: 130
783: 130
784: 131
785: 131
786: 131
787: 131
788: 309
789: 131
790: 131
791: 131
792: 131
793: 131
794: 131
795: 132
796: 132
797: 132
798: 133
799: 133
800: 133
801: 476
802: 133
803: 133
804: 133
805: 133
806: 133
807: 133
808: 592
809: 133
810: 133
811: 133
812: 552
813: 133
814: 133
815: 236
816: 134
817: 134
818: 135
819: 135
820: 135
821: 135
822: 466
823: 466
824: 580
825: 457
826: 138
827: 138
828: 138
829: 138
830: 138
831: 138
832: 138
833: 606
834: 139
835: 139
836: 139
837: 298
838: 522
839: 532
840: 141
841: 141
842: 499
843: 143
844: 143
845: 144
846: 144
847: 144
848: 144
849: 144
850: 347
851: 144
852: 454
853: 145
854: 146
855: 146
856: 147
857: 147
858: 148
859: 148
860: 149
861: 149
862: 149
863: 149
864: 149
865: 149
866: 149
867: 149
868: 149
869: 149
870: 149
871: 149
872: 149
873: 149
874: 149
875: 149
876: 149
877: 149
878: 149
879: 149
880: 149
881: 149
882: 149
883: 149
884: 149
885: 149
886: 149
887: 149
888: 597
889: 149
890: 149
891: 149
892: 149
893: 149
894: 149
895: 597
896: 149
897: 149
898: 597
899: 149
900: 149
901: 149
902: 149
903: 149
904: 149
905: 597
906: 149
907: 149
908: 149
909: 149
910: 149
911: 149
912: 149
913: 149
914: 149
915: 149
916: 149
917: 149
918: 149
919: 149
920: 169
921: 604
922: 149
923: 149
924: 149
925: 149
926: 190
927: 149
928: 149
929: 149
930: 149
931: 149
932: 149
933: 149
934: 149
935: 149
936: 149
937: 346
938: 149
939: 149
940: 563
941: 597
942: 149
943: 149
944: 149
945: 149
946: 149
947: 149
948: 149
949: 149
950: 150
951: 568
952: 151
953: 458
954: 151
955: 415
956: 152
957: 290
958: 153
959: 153
960: 153
961: 153
962: 153
963: 154
964: 154
965: 155
966: 155
967: 155
968: 156
969: 156
970: 156
971: 157
972: 157
973: 157
974: 157
975: 157
976: 157
977: 157
978: 157
979: 158
980: 158
981: 159
982: 458
983: 586
984: 586
985: 161
986: 162
987: 162
988: 163
989: 163
990: 163
991: 164
992: 579
993: 166
994: 166
995: 166
996: 166
997: 167
998: 167
999: 167
1000: 167
1001: 167
1002: 167
1003: 167
1004: 168
1005: 168
1006: 459
1007: 168
1008: 168
1009: 168
1010: 168
1011: 168
1012: 168
1013: 168
1014: 168
1015: 271
1016: 168
1017: 168
1018: 168
1019: 169
1020: 169
1021: 288
1022: 169
1023: 169
1024: 169
1025: 169
1026: 170
1027: 171
1028: 171
1029: 171
1030: 171
1031: 445
1032: 171
1033: 171
1034: 445
1035: 171
1036: 551
1037: 172
1038: 173
1039: 173
1040: 174
1041: 502
1042: 176
1043: 176
1044: 176
1045: 176
1046: 176
1047: 177
1048: 177
1049: 177
1050: 178
1051: 178
1052: 179
1053: 179
1054: 180
1055: 180
1056: 181
1057: 181
1058: 182
1059: 182
1060: 183
1061: 183
1062: 183
1063: 183
1064: 183
1065: 183
1066: 395
1067: 183
1068: 183
1069: 183
1070: 183
1071: 184
1072: 310
1073: 405
1074: 185
1075: 185
1076: 185
1077: 185
1078: 185
1079: 185
1080: 185
1081: 185
1082: 185
1083: 185
1084: 185
1085: 185
1086: 185
1087: 185
1088: 185
1089: 185
1090: 185
1091: 185
1092: 220
1093: 185
1094: 185
1095: 185
1096: 185
1097: 424
1098: 185
1099: 367
1100: 185
1101: 185
1102: 185
1103: 185
1104: 185
1105: 185
1106: 185
1107: 402
1108: 185
1109: 185
1110: 185
1111: 377
1112: 187
1113: 185
1114: 185
1115: 185
1116: 185
1117: 298
1118: 185
1119: 402
1120: 185
1121: 185
1122: 185
1123: 185
1124: 185
1125: 186
1126: 558
1127: 249
1128: 249
1129: 186
1130: 187
1131: 187
1132: 187
1133: 187
1134: 187
1135: 187
1136: 576
1137: 576
1138: 189
1139: 189
1140: 189
1141: 576
1142: 190
1143: 190
1144: 604
1145: 191
1146: 191
1147: 191
1148: 191
1149: 337
1150: 192
1151: 192
1152: 193
1153: 193
1154: 193
1155: 194
1156: 367
1157: 194
1158: 194
1159: 195
1160: 195
1161: 195
1162: 195
1163: 196
1164: 196
1165: 196
1166: 196
1167: 196
1168: 196
1169: 196
1170: 196
1171: 196
1172: 196
1173: 196
1174: 196
1175: 197
1176: 197
1177: 198
1178: 198
1179: 198
1180: 198
1181: 198
1182: 199
1183: 199
1184: 199
1185: 199
1186: 199
1187: 199
1188: 200
1189: 200
1190: 200
1191: 200
1192: 429
1193: 201
1194: 201
1195: 202
1196: 591
1197: 518
1198: 203
1199: 612
1200: 204
1201: 204
1202: 205
1203: 205
1204: 206
1205: 206
1206: 207
1207: 207
1208: 208
1209: 208
1210: 209
1211: 209
1212: 210
1213: 210
1214: 210
1215: 545
1216: 211
1217: 211
1218: 212
1219: 212
1220: 492
1221: 428
1222: 471
1223: 213
1224: 214
1225: 368
1226: 215
1227: 215
1228: 217
1229: 578
1230: 437
1231: 464
1232: 218
1233: 218
1234: 218
1235: 218
1236: 218
1237: 218
1238: 219
1239: 218
1240: 218
1241: 218
1242: 218
1243: 219
1244: 220
1245: 220
1246: 220
1247: 220
1248: 360
1249: 221
1250: 221
1251: 222
1252: 222
1253: 222
1254: 223
1255: 223
1256: 224
1257: 225
1258: 225
1259: 225
1260: 225
1261: 226
1262: 226
1263: 227
1264: 227
1265: 227
1266: 227
1267: 228
1268: 228
1269: 228
1270: 228
1271: 228
1272: 228
1273: 228
1274: 228
1275: 228
1276: 229
1277: 229
1278: 229
1279: 582
1280: 230
1281: 230
1282: 230
1283: 231
1284: 232
1285: 232
1286: 232
1287: 233
1288: 233
1289: 233
1290: 234
1291: 322
1292: 234
1293: 541
1294: 235
1295: 237
1296: 522
1297: 237
1298: 397
1299: 237
1300: 247
1301: 237
1302: 238
1303: 240
1304: 240
1305: 340
1306: 240
1307: 242
1308: 242
1309: 242
1310: 242
1311: 243
1312: 243
1313: 243
1314: 243
1315: 243
1316: 243
1317: 243
1318: 401
1319: 243
1320: 243
1321: 244
1322: 245
1323: 245
1324: 341
1325: 245
1326: 579
1327: 247
1328: 247
1329: 539
1330: 248
1331: 248
1332: 250
1333: 250
1334: 250
1335: 250
1336: 251
1337: 251
1338: 515
1339: 251
1340: 251
1341: 251
1342: 251
1343: 251
1344: 252
1345: 252
1346: 253
1347: 253
1348: 254
1349: 254
1350: 254
1351: 255
1352: 255
1353: 255
1354: 255
1355: 255
1356: 255
1357: 255
1358: 255
1359: 255
1360: 255
1361: 330
1362: 255
1363: 256
1364: 256
1365: 256
1366: 606
1367: 274
1368: 474
1369: 257
1370: 257
1371: 257
1372: 288
1373: 257
1374: 257
1375: 419
1376: 372
1377: 258
1378: 258
1379: 554
1380: 259
1381: 259
1382: 260
1383: 260
1384: 261
1385: 261
1386: 261
1387: 486
1388: 262
1389: 262
1390: 263
1391: 263
1392: 263
1393: 263
1394: 335
1395: 263
1396: 278
1397: 264
1398: 264
1399: 383
1400: 264
1401: 264
1402: 265
1403: 265
1404: 265
1405: 265
1406: 343
1407: 265
1408: 280
1409: 266
1410: 266
1411: 266
1412: 267
1413: 267
1414: 592
1415: 269
1416: 269
1417: 279
1418: 270
1419: 270
1420: 271
1421: 271
1422: 271
1423: 271
1424: 272
1425: 272
1426: 273
1427: 515
1428: 349
1429: 274
1430: 274
1431: 508
1432: 274
1433: 274
1434: 274
1435: 274
1436: 274
1437: 274
1438: 274
1439: 274
1440: 274
1441: 274
1442: 274
1443: 274
1444: 274
1445: 274
1446: 508
1447: 274
1448: 496
1449: 274
1450: 580
1451: 276
1452: 276
1453: 277
1454: 518
1455: 277
1456: 277
1457: 278
1458: 279
1459: 280
1460: 485
1461: 281
1462: 281
1463: 281
1464: 281
1465: 281
1466: 307
1467: 281
1468: 352
1469: 409
1470: 282
1471: 283
1472: 283
1473: 283
1474: 283
1475: 283
1476: 284
1477: 284
1478: 284
1479: 284
1480: 284
1481: 285
1482: 592
1483: 285
1484: 285
1485: 285
1486: 285
1487: 285
1488: 286
1489: 553
1490: 287
1491: 287
1492: 287
1493: 287
1494: 287
1495: 288
1496: 288
1497: 288
1498: 288
1499: 288
1500: 288
1501: 289
1502: 596
1503: 290
1504: 290
1505: 290
1506: 290
1507: 290
1508: 290
1509: 290
1510: 290
1511: 290
1512: 290
1513: 290
1514: 291
1515: 291
1516: 341
1517: 293
1518: 293
1519: 293
1520: 293
1521: 293
1522: 293
1523: 293
1524: 293
1525: 293
1526: 293
1527: 539
1528: 293
1529: 293
1530: 294
1531: 294
1532: 295
1533: 295
1534: 295
1535: 296
1536: 296
1537: 296
1538: 296
1539: 296
1540: 296
1541: 296
1542: 296
1543: 296
1544: 296
1545: 296
1546: 296
1547: 296
1548: 296
1549: 296
1550: 296
1551: 296
1552: 296
1553: 296
1554: 572
1555: 296
1556: 296
1557: 296
1558: 360
1559: 459
1560: 299
1561: 299
1562: 299
1563: 300
1564: 302
1565: 302
1566: 302
1567: 333
1568: 305
1569: 305
1570: 358
1571: 358
1572: 535
1573: 480
1574: 480
1575: 307
1576: 307
1577: 309
1578: 309
1579: 309
1580: 310
1581: 310
1582: 310
1583: 311
1584: 311
1585: 312
1586: 313
1587: 314
1588: 314
1589: 314
1590: 315
1591: 315
1592: 316
1593: 317
1594: 317
1595: 518
1596: 317
1597: 518
1598: 518
1599: 317
1600: 317
1601: 518
1602: 318
1603: 506
1604: 318
1605: 380
1606: 380
1607: 318
1608: 380
1609: 355
1610: 318
1611: 318
1612: 318
1613: 318
1614: 319
1615: 319
1616: 500
1617: 320
1618: 320
1619: 321
1620: 321
1621: 322
1622: 322
1623: 322
1624: 322
1625: 322
1626: 322
1627: 322
1628: 322
1629: 322
1630: 322
1631: 322
1632: 322
1633: 322
1634: 322
1635: 322
1636: 322
1637: 322
1638: 322
1639: 429
1640: 322
1641: 322
1642: 322
1643: 322
1644: 322
1645: 322
1646: 322
1647: 322
1648: 322
1649: 448
1650: 322
1651: 329
1652: 522
1653: 322
1654: 322
1655: 322
1656: 322
1657: 322
1658: 322
1659: 322
1660: 322
1661: 322
1662: 322
1663: 322
1664: 322
1665: 322
1666: 322
1667: 322
1668: 322
1669: 322
1670: 322
1671: 322
1672: 322
1673: 322
1674: 322
1675: 322
1676: 322
1677: 549
1678: 323
1679: 323
1680: 323
1681: 323
1682: 449
1683: 324
1684: 324
1685: 449
1686: 449
1687: 324
1688: 324
1689: 325
1690: 453
1691: 496
1692: 496
1693: 327
1694: 327
1695: 423
1696: 327
1697: 328
1698: 329
1699: 329
1700: 566
1701: 566
1702: 331
1703: 331
1704: 332
1705: 540
1706: 540
1707: 333
1708: 333
1709: 334
1710: 334
1711: 335
1712: 336
1713: 563
1714: 336
1715: 336
1716: 442
1717: 424
1718: 337
1719: 337
1720: 337
1721: 461
1722: 337
1723: 337
1724: 337
1725: 337
1726: 337
1727: 338
1728: 600
1729: 591
1730: 340
1731: 340
1732: 341
1733: 341
1734: 492
1735: 341
1736: 341
1737: 342
1738: 342
1739: 453
1740: 509
1741: 343
1742: 344
1743: 345
1744: 345
1745: 347
1746: 347
1747: 347
1748: 347
1749: 347
1750: 347
1751: 347
1752: 560
1753: 347
1754: 347
1755: 347
1756: 603
1757: 348
1758: 348
1759: 348
1760: 348
1761: 348
1762: 348
1763: 348
1764: 348
1765: 348
1766: 348
1767: 348
1768: 348
1769: 572
1770: 590
1771: 349
1772: 349
1773: 349
1774: 350
1775: 350
1776: 350
1777: 351
1778: 351
1779: 351
1780: 352
1781: 352
1782: 352
1783: 352
1784: 352
1785: 353
1786: 354
1787: 354
1788: 355
1789: 355
1790: 355
1791: 357
1792: 357
1793: 357
1794: 359
1795: 359
1796: 360
1797: 361
1798: 361
1799: 362
1800: 362
1801: 363
1802: 363
1803: 364
1804: 364
1805: 364
1806: 574
1807: 364
1808: 364
1809: 606
1810: 365
1811: 366
1812: 366
1813: 422
1814: 366
1815: 367
1816: 367
1817: 367
1818: 367
1819: 367
1820: 367
1821: 367
1822: 367
1823: 367
1824: 367
1825: 367
1826: 367
1827: 367
1828: 367
1829: 367
1830: 367
1831: 367
1832: 367
1833: 367
1834: 367
1835: 367
1836: 367
1837: 367
1838: 367
1839: 367
1840: 367
1841: 367
1842: 367
1843: 367
1844: 367
1845: 367
1846: 367
1847: 367
1848: 367
1849: 367
1850: 367
1851: 367
1852: 367
1853: 367
1854: 367
1855: 367
1856: 367
1857: 367
1858: 367
1859: 367
1860: 367
1861: 367
1862: 367
1863: 367
1864: 367
1865: 367
1866: 367
1867: 367
1868: 367
1869: 367
1870: 367
1871: 367
1872: 367
1873: 367
1874: 367
1875: 367
1876: 367
1877: 367
1878: 367
1879: 367
1880: 367
1881: 367
1882: 367
1883: 367
1884: 367
1885: 367
1886: 367
1887: 367
1888: 367
1889: 367
1890: 367
1891: 367
1892: 367
1893: 367
1894: 367
1895: 367
1896: 367
1897: 367
1898: 367
1899: 368
1900: 369
1901: 558
1902: 371
1903: 371
1904: 371
1905: 371
1906: 371
1907: 372
1908: 372
1909: 372
1910: 372
1911: 372
1912: 372
1913: 373
1914: 373
1915: 373
1916: 373
1917: 373
1918: 373
1919: 373
1920: 373
1921: 373
1922: 373
1923: 374
1924: 374
1925: 374
1926: 438
1927: 374
1928: 374
1929: 437
1930: 374
1931: 374
1932: 374
1933: 374
1934: 374
1935: 374
1936: 374
1937: 374
1938: 374
1939: 374
1940: 375
1941: 375
1942: 376
1943: 376
1944: 377
1945: 377
1946: 377
1947: 377
1948: 377
1949: 377
1950: 377
1951: 377
1952: 377
1953: 377
1954: 377
1955: 378
1956: 378
1957: 378
1958: 571
1959: 378
1960: 378
1961: 378
1962: 378
1963: 380
1964: 380
1965: 380
1966: 380
1967: 380
1968: 380
1969: 380
1970: 380
1971: 380
1972: 380
1973: 380
1974: 380
1975: 380
1976: 380
1977: 380
1978: 554
1979: 380
1980: 380
1981: 381
1982: 381
1983: 381
1984: 382
1985: 382
1986: 383
1987: 383
1988: 384
1989: 384
1990: 384
1991: 385
1992: 385
1993: 385
1994: 386
1995: 386
1996: 387
1997: 387
1998: 387
1999: 389
2000: 390
2001: 390
2002: 396
2003: 396
2004: 396
2005: 392
2006: 539
2007: 392
2008: 532
2009: 392
2010: 392
2011: 393
2012: 393
2013: 394
2014: 607
2015: 395
2016: 395
2017: 395
2018: 395
2019: 407
2020: 395
2021: 395
2022: 395
2023: 407
2024: 607
2025: 607
2026: 395
2027: 395
2028: 395
2029: 396
2030: 396
2031: 396
2032: 396
2033: 396
2034: 396
2035: 396
2036: 396
2037: 397
2038: 397
2039: 397
2040: 397
2041: 397
2042: 397
2043: 399
2044: 399
2045: 399
2046: 400
2047: 400
2048: 401
2049: 401
2050: 401
2051: 579
2052: 401
2053: 401
2054: 401
2055: 401
2056: 401
2057: 402
2058: 403
2059: 404
2060: 404
2061: 578
2062: 404
2063: 404
2064: 404
2065: 406
2066: 574
2067: 407
2068: 407
2069: 407
2070: 407
2071: 407
2072: 407
2073: 407
2074: 407
2075: 407
2076: 534
2077: 407
2078: 408
2079: 408
2080: 408
2081: 408
2082: 408
2083: 408
2084: 408
2085: 408
2086: 408
2087: 408
2088: 408
2089: 408
2090: 408
2091: 408
2092: 409
2093: 409
2094: 409
2095: 410
2096: 410
2097: 411
2098: 411
2099: 411
2100: 411
2101: 411
2102: 412
2103: 413
2104: 413
2105: 414
2106: 414
2107: 414
2108: 414
2109: 415
2110: 415
2111: 415
2112: 508
2113: 416
2114: 416
2115: 535
2116: 416
2117: 417
2118: 417
2119: 417
2120: 418
2121: 418
2122: 418
2123: 418
2124: 419
2125: 420
2126: 420
2127: 420
2128: 421
2129: 421
2130: 492
2131: 422
2132: 423
2133: 423
2134: 423
2135: 423
2136: 496
2137: 423
2138: 423
2139: 423
2140: 423
2141: 424
2142: 425
2143: 425
2144: 425
2145: 518
2146: 518
2147: 426
2148: 426
2149: 426
2150: 426
2151: 427
2152: 428
2153: 428
2154: 429
2155: 430
2156: 430
2157: 430
2158: 430
2159: 430
2160: 431
2161: 431
2162: 431
2163: 431
2164: 431
2165: 441
2166: 431
2167: 431
2168: 431
2169: 431
2170: 431
2171: 431
2172: 431
2173: 431
2174: 431
2175: 431
2176: 431
2177: 431
2178: 431
2179: 431
2180: 431
2181: 431
2182: 431
2183: 431
2184: 431
2185: 432
2186: 546
2187: 433
2188: 434
2189: 518
2190: 539
2191: 539
2192: 539
2193: 436
2194: 436
2195: 508
2196: 437
2197: 437
2198: 437
2199: 437
2200: 438
2201: 438
2202: 438
2203: 555
2204: 617
2205: 584
2206: 438
2207: 438
2208: 439
2209: 439
2210: 440
2211: 442
2212: 442
2213: 442
2214: 443
2215: 443
2216: 444
2217: 451
2218: 444
2219: 444
2220: 444
2221: 444
2222: 444
2223: 445
2224: 445
2225: 445
2226: 445
2227: 524
2228: 445
2229: 447
2230: 447
2231: 447
2232: 447
2233: 447
2234: 447
2235: 447
2236: 448
2237: 448
2238: 487
2239: 448
2240: 448
2241: 448
2242: 448
2243: 452
2244: 450
2245: 450
2246: 450
2247: 451
2248: 451
2249: 451
2250: 451
2251: 452
2252: 452
2253: 453
2254: 453
2255: 453
2256: 453
2257: 453
2258: 453
2259: 454
2260: 454
2261: 454
2262: 454
2263: 515
2264: 455
2265: 455
2266: 455
2267: 456
2268: 457
2269: 458
2270: 458
2271: 458
2272: 458
2273: 458
2274: 458
2275: 458
2276: 458
2277: 458
2278: 458
2279: 458
2280: 458
2281: 458
2282: 458
2283: 458
2284: 459
2285: 459
2286: 459
2287: 459
2288: 460
2289: 460
2290: 461
2291: 462
2292: 462
2293: 462
2294: 463
2295: 463
2296: 509
2297: 467
2298: 467
2299: 468
2300: 468
2301: 469
2302: 469
2303: 469
2304: 472
2305: 472
2306: 473
2307: 473
2308: 474
2309: 474
2310: 474
2311: 474
2312: 474
2313: 474
2314: 474
2315: 598
2316: 475
2317: 476
2318: 477
2319: 477
2320: 477
2321: 477
2322: 477
2323: 477
2324: 477
2325: 479
2326: 479
2327: 481
2328: 481
2329: 481
2330: 481
2331: 482
2332: 482
2333: 482
2334: 482
2335: 482
2336: 483
2337: 483
2338: 483
2339: 483
2340: 484
2341: 484
2342: 485
2343: 486
2344: 486
2345: 486
2346: 486
2347: 487
2348: 487
2349: 487
2350: 487
2351: 487
2352: 488
2353: 489
2354: 554
2355: 490
2356: 490
2357: 491
2358: 491
2359: 492
2360: 492
2361: 492
2362: 492
2363: 492
2364: 492
2365: 492
2366: 492
2367: 492
2368: 492
2369: 492
2370: 492
2371: 492
2372: 492
2373: 492
2374: 492
2375: 492
2376: 492
2377: 492
2378: 492
2379: 492
2380: 492
2381: 493
2382: 494
2383: 494
2384: 495
2385: 495
2386: 496
2387: 496
2388: 496
2389: 497
2390: 497
2391: 498
2392: 498
2393: 601
2394: 498
2395: 499
2396: 499
2397: 499
2398: 499
2399: 499
2400: 499
2401: 499
2402: 499
2403: 499
2404: 499
2405: 499
2406: 501
2407: 501
2408: 501
2409: 502
2410: 502
2411: 503
2412: 503
2413: 504
2414: 504
2415: 505
2416: 505
2417: 505
2418: 505
2419: 505
2420: 506
2421: 506
2422: 506
2423: 506
2424: 506
2425: 616
2426: 507
2427: 507
2428: 523
2429: 508
2430: 508
2431: 508
2432: 510
2433: 509
2434: 509
2435: 509
2436: 509
2437: 509
2438: 509
2439: 510
2440: 511
2441: 511
2442: 511
2443: 512
2444: 512
2445: 512
2446: 512
2447: 512
2448: 512
2449: 513
2450: 513
2451: 513
2452: 513
2453: 514
2454: 514
2455: 518
2456: 515
2457: 515
2458: 515
2459: 515
2460: 515
2461: 515
2462: 515
2463: 515
2464: 515
2465: 515
2466: 515
2467: 515
2468: 515
2469: 515
2470: 515
2471: 515
2472: 515
2473: 515
2474: 515
2475: 516
2476: 516
2477: 516
2478: 517
2479: 517
2480: 517
2481: 518
2482: 518
2483: 518
2484: 597
2485: 518
2486: 518
2487: 518
2488: 518
2489: 518
2490: 518
2491: 518
2492: 518
2493: 518
2494: 518
2495: 518
2496: 518
2497: 518
2498: 518
2499: 518
2500: 518
2501: 518
2502: 518
2503: 518
2504: 518
2505: 518
2506: 518
2507: 518
2508: 518
2509: 518
2510: 519
2511: 520
2512: 521
2513: 521
2514: 522
2515: 522
2516: 522
2517: 522
2518: 522
2519: 572
2520: 572
2521: 523
2522: 524
2523: 525
2524: 526
2525: 528
2526: 529
2527: 530
2528: 531
2529: 531
2530: 531
2531: 532
2532: 532
2533: 533
2534: 533
2535: 533
2536: 533
2537: 535
2538: 535
2539: 537
2540: 538
2541: 538
2542: 539
2543: 539
2544: 539
2545: 539
2546: 539
2547: 539
2548: 539
2549: 541
2550: 541
2551: 541
2552: 541
2553: 542
2554: 543
2555: 544
2556: 612
2557: 545
2558: 546
2559: 546
2560: 546
2561: 546
2562: 546
2563: 546
2564: 546
2565: 546
2566: 547
2567: 547
2568: 548
2569: 548
2570: 549
2571: 549
2572: 549
2573: 549
2574: 549
2575: 549
2576: 549
2577: 549
2578: 549
2579: 549
2580: 549
2581: 549
2582: 549
2583: 549
2584: 549
2585: 549
2586: 549
2587: 549
2588: 549
2589: 549
2590: 549
2591: 549
2592: 549
2593: 549
2594: 549
2595: 549
2596: 549
2597: 550
2598: 550
2599: 550
2600: 550
2601: 550
2602: 551
2603: 551
2604: 552
2605: 553
2606: 553
2607: 553
2608: 553
2609: 553
2610: 553
2611: 554
2612: 554
2613: 554
2614: 554
2615: 554
2616: 554
2617: 554
2618: 554
2619: 554
2620: 554
2621: 554
2622: 554
2623: 554
2624: 554
2625: 554
2626: 554
2627: 554
2628: 554
2629: 554
2630: 554
2631: 554
2632: 554
2633: 554
2634: 554
2635: 554
2636: 554
2637: 554
2638: 555
2639: 555
2640: 555
2641: 555
2642: 555
2643: 555
2644: 555
2645: 555
2646: 556
2647: 557
2648: 557
2649: 557
2650: 558
2651: 558
2652: 558
2653: 558
2654: 559
2655: 559
2656: 561
2657: 561
2658: 561
2659: 561
2660: 561
2661: 561
2662: 561
2663: 561
2664: 561
2665: 561
2666: 561
2667: 561
2668: 561
2669: 561
2670: 561
2671: 564
2672: 564
2673: 565
2674: 565
2675: 567
2676: 568
2677: 568
2678: 569
2679: 569
2680: 569
2681: 571
2682: 572
2683: 572
2684: 572
2685: 572
2686: 572
2687: 572
2688: 572
2689: 572
2690: 572
2691: 572
2692: 572
2693: 572
2694: 572
2695: 572
2696: 572
2697: 572
2698: 572
2699: 572
2700: 572
2701: 573
2702: 573
2703: 574
2704: 575
2705: 576
2706: 576
2707: 577
2708: 578
2709: 578
2710: 578
2711: 578
2712: 578
2713: 578
2714: 578
2715: 578
2716: 579
2717: 579
2718: 579
2719: 579
2720: 579
2721: 579
2722: 580
2723: 580
2724: 580
2725: 580
2726: 580
2727: 580
2728: 580
2729: 580
2730: 580
2731: 580
2732: 580
2733: 581
2734: 582
2735: 582
2736: 582
2737: 582
2738: 583
2739: 584
2740: 584
2741: 584
2742: 585
2743: 585
2744: 586
2745: 587
2746: 587
2747: 588
2748: 588
2749: 615
2750: 589
2751: 589
2752: 590
2753: 592
2754: 593
2755: 593
2756: 596
2757: 596
2758: 597
2759: 597
2760: 597
2761: 597
2762: 598
2763: 599
2764: 599
2765: 600
2766: 608
2767: 602
2768: 602
2769: 603
2770: 605
2771: 606
2772: 606
2773: 606
2774: 606
2775: 607
2776: 608
2777: 609
2778: 610
2779: 611
2780: 611
2781: 612
2782: 612
2783: 612
2784: 613
2785: 613
2786: 614
2787: 614
2788: 615
2789: 615
2790: 615
2791: 615
2792: 615
2793: 615
2794: 616
2795: 616
2796: 616
2797: 616
2798: 616
2799: 616
2800: 617


================================================
FILE: spec/nmatrix_yale_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == nmatrix_yale_spec.rb
#
# Basic tests for NMatrix's Yale storage type.
#
require 'spec_helper'
require "./lib/nmatrix"

describe NMatrix do
  context :yale do

    it "compares two empty matrices" do
      n = NMatrix.new(4, stype: :yale, dtype: :float64)
      m = NMatrix.new(4, stype: :yale, dtype: :float64)
      expect(n).to eq(m)
    end

    it "compares two matrices following basic assignments" do
      n = NMatrix.new(2, stype: :yale, dtype: :float64)
      m = NMatrix.new(2, stype: :yale, dtype: :float64)

      m[0,0] = 1
      m[0,1] = 1
      expect(n).not_to eq(m)
      n[0,0] = 1
      expect(n).not_to eq(m)
      n[0,1] = 1
      expect(n).to eq(m)
    end

    it "compares two matrices following elementwise operations" do
      n = NMatrix.new(2, stype: :yale, dtype: :float64)
      m = NMatrix.new(2, stype: :yale, dtype: :float64)
      n[0,1] = 1
      m[0,1] = -1
      x = n+m
      expect(n+m).to eq(NMatrix.new(2, 0.0, stype: :yale))
    end

    it "sets diagonal values" do
      n = NMatrix.new([2,3], stype: :yale, dtype: :float64)
      n.extend(NMatrix::YaleFunctions)
      n[1,1] = 0.1
      n[0,0] = 0.2
      expect(n.yale_d).to eq([0.2, 0.1])
    end

    it "gets non-diagonal rows as hashes" do
      n = NMatrix.new([4,6], stype: :yale, dtype: :float64)
      n.extend(NMatrix::YaleFunctions)
      n[0,0] = 0.1
      n[0,2] = 0.2
      n[0,3] = 0.3
      n[1,5] = 0.4
      h = n.yale_nd_row(0, :hash)
      expect(h).to eq({2 => 0.2, 3 => 0.3})
    end

    it "gets non-diagonal occupied column indices for a given row" do
      n = NMatrix.new([4,6], stype: :yale, dtype: :float64)
      n.extend(NMatrix::YaleFunctions)
      n[0,0] = 0.1
      n[0,2] = 0.2
      n[0,3] = 0.3
      n[1,5] = 0.4
      a = n.yale_nd_row(0, :array)
      expect(a).to eq([2,3])
    end

    it "does not resize until necessary" do
      n = NMatrix.new([2,3], stype: :yale, dtype: :float64)
      n.extend(NMatrix::YaleFunctions)
      expect(n.yale_size).to eq(3)
      expect(n.capacity).to eq(5)
      n[0,0] = 0.1
      n[0,1] = 0.2
      n[1,0] = 0.3
      expect(n.yale_size).to eq(5)
      expect(n.capacity).to eq(5)
    end


    it "sets when not resizing" do
      n = NMatrix.new([2,3], stype: :yale, dtype: :float64)
      n.extend(NMatrix::YaleFunctions)
      n[0,0] = 0.1
      n[0,1] = 0.2
      n[1,0] = 0.3
      expect(n.yale_a).to eq([0.1, 0.0, 0.0, 0.2, 0.3])
      expect(n.yale_ija).to eq([3,4,5,1,0])
    end

    it "sets when resizing" do
      n = NMatrix.new([2,3], stype: :yale, dtype: :float64)
      n.extend(NMatrix::YaleFunctions)
      n[0,0] = 0.01
      n[1,1] = 0.1
      n[0,1] = 0.2
      n[1,0] = 0.3
      n[1,2] = 0.4
      expect(n.yale_d).to eq([0.01, 0.1])
      expect(n.yale_ia).to eq([3,4,6])
      expect(n.yale_ja).to eq([1,0,2,nil])
      expect(n.yale_lu).to eq([0.2, 0.3, 0.4, nil])
    end

    it "resizes without erasing values" do
      require 'yaml'

      associations = File.open('spec/nmatrix_yale_resize_test_associations.yaml') { |y| YAML::load(y) }

      n = NMatrix.new([618,2801], stype: :yale, dtype: :byte, capacity: associations.size)
      #n = NMatrix.new(:yale, [618, 2801], associations.size, :byte)

      associations.each_pair do |j,i|
        n[i,j] = 1
        expect(n[i,j]).to be(1), "Value at #{i},#{j} not inserted correctly!"
      end

      associations.each_pair do |j,i|
        expect(n[i,j]).to be(1), "Value at #{i},#{j} erased during resize!"
      end
    end

    it "sets values within rows" do
      n = NMatrix.new([3,20], stype: :yale, dtype: :float64)
      n.extend(NMatrix::YaleFunctions)
      n[2,1]   = 1.0
      n[2,0]   = 1.5
      n[2,15]  = 2.0
      expect(n.yale_lu).to eq([1.5, 1.0, 2.0])
      expect(n.yale_ja).to eq([0, 1, 15])
    end

    it "gets values within rows" do
      n = NMatrix.new([3,20], stype: :yale, dtype: :float64)
      n[2,1]   = 1.0
      n[2,0]   = 1.5
      n[2,15]  = 2.0
      expect(n[2,1]).to eq(1.0)
      expect(n[2,0]).to eq(1.5)
      expect(n[2,15]).to eq(2.0)
    end

    it "sets values within large rows" do
      n = NMatrix.new([10,300], stype: :yale, dtype: :float64)
      n.extend(NMatrix::YaleFunctions)
      n[5,1]   = 1.0
      n[5,0]   = 1.5
      n[5,15]  = 2.0
      n[5,291] = 3.0
      n[5,292] = 4.0
      n[5,289] = 5.0
      n[5,290] = 6.0
      n[5,293] = 2.0
      n[5,299] = 7.0
      n[5,100] = 8.0
      expect(n.yale_lu).to eq([1.5, 1.0, 2.0, 8.0, 5.0, 6.0, 3.0, 4.0, 2.0, 7.0])
      expect(n.yale_ja).to eq([0,   1,   15,  100, 289, 290, 291, 292, 293, 299])
    end

    it "gets values within large rows" do
      n = NMatrix.new([10,300], stype: :yale, dtype: :float64)
      n.extend(NMatrix::YaleFunctions)
      n[5,1]   = 1.0
      n[5,0]   = 1.5
      n[5,15]  = 2.0
      n[5,291] = 3.0
      n[5,292] = 4.0
      n[5,289] = 5.0
      n[5,290] = 6.0
      n[5,293] = 2.0
      n[5,299] = 7.0
      n[5,100] = 8.0

      n.yale_ja.each_index do |idx|
        j = n.yale_ja[idx]
        expect(n[5,j]).to eq(n.yale_lu[idx])
      end
    end

    it "dots two identical matrices" do
      a = NMatrix.new(4, stype: :yale, dtype: :float64)
      a[0,1] = 4.0
      a[1,2] = 1.0
      a[1,3] = 1.0
      a[3,1] = 2.0

      b = a.dup
      c = a.dot b

      d = NMatrix.new(4, [0,0,4,4, 0,2,0,0, 0,0,0,0, 0,0,2,2], dtype: :float64, stype: :yale)

      expect(c).to eq(d)
    end

    it "dots two identical matrices where a positive and negative partial sum cancel on the diagonal" do
      a = NMatrix.new(4, 0.0, stype: :yale)

      a[0,0] = 1.0
      a[0,1] = 4.0
      a[1,2] = 2.0
      a[1,3] = -4.0
      a[3,1] = 4.0
      a[3,3] = 4.0

      b = a.dup
      c = a.dot b

      c.extend(NMatrix::YaleFunctions)

      expect(c.yale_ija.reject { |i| i.nil? }).to eq([5,8,9,9,11,1,2,3,3,1,2])
      expect(c.yale_a.reject { |i| i.nil? }).to eq([1.0, -16.0, 0.0, 0.0, 0.0, 4.0, 8.0, -16.0, -16.0, 16.0, 8.0])

    end

    it "dots two vectors" do
      n = NMatrix.new([16,1], 0, stype: :yale)
      m = NMatrix.new([1,16], 0, stype: :yale)

      n[0] = m[0] = 1
      n[1] = m[1] = 2
      n[2] = m[2] = 3
      n[3] = m[3] = 4
      n[4] = m[4] = 5
      n[5] = m[5] = 6
      n[6] = m[6] = 7
      n[7] = m[7] = 8
      n[8] = m[8] = 9
      n[15] = m[15] = 16

      nm = n.dot(m)

      # Perform the same multiplication with dense
      nmr = n.cast(:dense, :int64).dot(m.cast(:dense, :int64)).cast(:yale, :int64)

      nm.extend(NMatrix::YaleFunctions)
      nmr.extend(NMatrix::YaleFunctions)

      # We want to do a structure comparison to ensure multiplication is occurring properly, but more importantly, to
      # ensure that insertion sort is occurring as it should. If the row has more than four entries, it'll run quicksort
      # instead. Quicksort calls insertion sort for small rows, so we test both with this particular multiplication.
      expect(nm.yale_ija[0...107]).to eq(nmr.yale_ija[0...107])
      expect(nm.yale_a[0...107]).to   eq(nmr.yale_a[0...107])

      mn = m.dot(n)
      expect(mn[0,0]).to eq(541)
    end

    it "calculates the row key intersections of two matrices" do
      a = NMatrix.new([3,9], [0,1], stype: :yale, dtype: :byte, default: 0)
      b = NMatrix.new([3,9], [0,0,1,0,1], stype: :yale, dtype: :byte, default: 0)
      a.extend NMatrix::YaleFunctions
      b.extend NMatrix::YaleFunctions

      (0...3).each do |ai|
        (0...3).each do |bi|
          STDERR.puts (a.yale_ja_d_keys_at(ai) & b.yale_ja_d_keys_at(bi)).inspect
          expect(a.yale_ja_d_keys_at(ai) & b.yale_ja_d_keys_at(bi)).to eq(a.yale_row_keys_intersection(ai, b, bi))
        end
      end

    end
  end
end


================================================
FILE: spec/plugins/atlas/atlas_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == atlas_spec.rb
#
# Tests for interfaces that are only exposed by nmatrix-atlas
#

require 'spec_helper'
require "./lib/nmatrix/atlas"

describe "NMatrix::LAPACK implementation from nmatrix-atlas plugin" do
  [:float32, :float64, :complex64, :complex128].each do |dtype|
    context dtype do
      it "exposes clapack_getri" do
        a = NMatrix.new(:dense, 3, [1,0,4,1,1,6,-3,0,-10], dtype)
        ipiv = NMatrix::LAPACK::clapack_getrf(:row, 3, 3, a, 3) # get pivot from getrf, use for getri

        begin
          NMatrix::LAPACK::clapack_getri(:row, 3, a, 3, ipiv)

          b = NMatrix.new(:dense, 3, [-5,0,-2,-4,1,-1,1.5,0,0.5], dtype)
          expect(a).to eq(b)
        rescue NotImplementedError => e
          pending e.to_s
        end
      end

      # potrf decomposes a symmetric (or Hermitian)
      # positive-definite matrix. The matrix tested below isn't symmetric.
      # But this is okay since potrf just examines the upper/lower half
      # (as requested) of the matrix and assumes that the rest is symmetric,
      # so we just set the other part of the matrix to zero.
      it "exposes clapack_potrf upper" do
        pending "potrf requires clapack" unless NMatrix.has_clapack?

        a = NMatrix.new(:dense, 3, [25,15,-5, 0,18,0, 0,0,11], dtype)
        NMatrix::LAPACK::clapack_potrf(:row, :upper, 3, a, 3)
        b = NMatrix.new(:dense, 3, [5,3,-1, 0,3,1, 0,0,3], dtype)
        expect(a).to eq(b)
      end

      it "exposes clapack_potrf lower" do
        pending "potrf requires clapack" unless NMatrix.has_clapack?

        a = NMatrix.new(:dense, 3, [25,0,0, 15,18,0,-5,0,11], dtype)
        NMatrix::LAPACK::clapack_potrf(:row, :lower, 3, a, 3)
        b = NMatrix.new(:dense, 3, [5,0,0, 3,3,0, -1,1,3], dtype)
        expect(a).to eq(b)
      end

      it "exposes clapack_potri" do
        pending "potri requires clapack" unless NMatrix.has_clapack?

        a = NMatrix.new(3, [4, 0,-1,
                            0, 2, 1,
                            0, 0, 1], dtype: dtype)
        NMatrix::LAPACK::clapack_potrf(:row, :upper, 3, a, 3)
        NMatrix::LAPACK::clapack_potri(:row, :upper, 3, a, 3)
        b = NMatrix.new(3, [0.5, -0.5, 1,  0, 1.5, -2,  0, 0, 4], dtype: dtype)
        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-14
              end
        expect(a).to be_within(err).of(b)
      end

      it "exposes clapack_potrs" do
        pending "potrs requires clapack" unless NMatrix.has_clapack?

        a = NMatrix.new(3, [4, 0,-1,
                            0, 2, 1,
                            0, 0, 1], dtype: dtype)
        b = NMatrix.new([3,1], [3,0,2], dtype: dtype)

        NMatrix::LAPACK::clapack_potrf(:row, :upper, 3, a, 3)
        NMatrix::LAPACK::clapack_potrs(:row, :upper, 3, 1, a, 3, b, 3)

        x = NMatrix.new([3,1], [3.5, -5.5, 11], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(b).to be_within(err).of(x)
      end
    end
  end

  [:float32, :float64, :complex64, :complex128].each do |dtype|
    context dtype do
      it "calculates the singular value decomposition with lapack_gesvd" do
        #example from Wikipedia
        m = 4
        n = 5
        mn_min = [m,n].min
        a = NMatrix.new([m,n],[1,0,0,0,2, 0,0,3,0,0, 0,0,0,0,0, 0,4,0,0,0], dtype: dtype)
        s = NMatrix.new([mn_min], 0, dtype: a.abs_dtype) #s is always real and always returned as float/double, never as complex
        u = NMatrix.new([m,m], 0, dtype: dtype)
        vt = NMatrix.new([n,n], 0, dtype: dtype)

        # This is a pure LAPACK function so it expects column-major functions
        # So we need to transpose the input as well as the output
        a = a.transpose
        NMatrix::LAPACK.lapack_gesvd(:a, :a, m, n, a, m, s, u, m, vt, n, 500)
        u = u.transpose
        vt = vt.transpose

        s_true = NMatrix.new([mn_min], [4,3,Math.sqrt(5),0], dtype: a.abs_dtype)
        u_true = NMatrix.new([m,m], [0,0,1,0, 0,1,0,0, 0,0,0,-1, 1,0,0,0], dtype: dtype)
        vt_true = NMatrix.new([n,n], [0,1,0,0,0, 0,0,1,0,0, Math.sqrt(0.2),0,0,0,Math.sqrt(0.8), 0,0,0,1,0, -Math.sqrt(0.8),0,0,0,Math.sqrt(0.2)], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(s).to be_within(err).of(s_true)
        expect(u).to be_within(err).of(u_true)
        expect(vt).to be_within(err).of(vt_true)
      end

      it "calculates the singular value decomposition with lapack_gesdd" do
        #example from Wikipedia
        m = 4
        n = 5
        mn_min = [m,n].min
        a = NMatrix.new([m,n],[1,0,0,0,2, 0,0,3,0,0, 0,0,0,0,0, 0,4,0,0,0], dtype: dtype)
        s = NMatrix.new([mn_min], 0, dtype: a.abs_dtype) #s is always real and always returned as float/double, never as complex
        u = NMatrix.new([m,m], 0, dtype: dtype)
        vt = NMatrix.new([n,n], 0, dtype: dtype)

        # This is a pure LAPACK function so it expects column-major functions
        # So we need to transpose the input as well as the output
        a = a.transpose
        NMatrix::LAPACK.lapack_gesdd(:a, m, n, a, m, s, u, m, vt, n, 500)
        u = u.transpose
        vt = vt.transpose

        s_true = NMatrix.new([mn_min], [4,3,Math.sqrt(5),0], dtype: a.abs_dtype)
        u_true = NMatrix.new([m,m], [0,0,1,0, 0,1,0,0, 0,0,0,-1, 1,0,0,0], dtype: dtype)
        vt_true = NMatrix.new([n,n], [0,1,0,0,0, 0,0,1,0,0, Math.sqrt(0.2),0,0,0,Math.sqrt(0.8), 0,0,0,1,0, -Math.sqrt(0.8),0,0,0,Math.sqrt(0.2)], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(s).to be_within(err).of(s_true)
        expect(u).to be_within(err).of(u_true)
        expect(vt).to be_within(err).of(vt_true)
      end

      it "exposes lapack_geev" do
        n = 3
        a = NMatrix.new([n,n], [-1,0,0, 0,1,-2, 0,1,-1], dtype: dtype)
        w = NMatrix.new([n], dtype: dtype)
        if a.complex_dtype? #for real dtypes, imaginary parts of eigenvalues are stored in separate vector
          wi = nil
        else
          wi = NMatrix.new([n], dtype: dtype)
        end
        vl = NMatrix.new([n,n], dtype: dtype)
        vr = NMatrix.new([n,n], dtype: dtype)

        # This is a pure LAPACK routine so it expects column-major matrices,
        # so we need to transpose everything.
        a = a.transpose
        NMatrix::LAPACK::lapack_geev(:left, :right, n, a, n, w, wi, vl, n, vr, n, 2*n)
        vr = vr.transpose
        vl = vl.transpose

        if !a.complex_dtype?
          w = w + wi*Complex(0,1)
        end

        w_true = NMatrix.new([n], [Complex(0,1), -Complex(0,1), -1], dtype: NMatrix.upcast(dtype, :complex64))
        if a.complex_dtype?
          #For complex types the right/left eigenvectors are stored as columns
          #of vr/vl.
          vr_true = NMatrix.new([n,n],[0,0,1,
                                       2/Math.sqrt(6),2/Math.sqrt(6),0,
                                       Complex(1,-1)/Math.sqrt(6),Complex(1,1)/Math.sqrt(6),0], dtype: dtype)
          vl_true = NMatrix.new([n,n],[0,0,1,
                                       Complex(-1,1)/Math.sqrt(6),Complex(-1,-1)/Math.sqrt(6),0,
                                       2/Math.sqrt(6),2/Math.sqrt(6),0], dtype: dtype)
        else
          #For real types, the real part of the first and second eigenvectors is
          #stored in the first column, the imaginary part of the first (= the
          #negative of the imaginary part of the second) eigenvector is stored
          #in the second column, and the third eigenvector (purely real) is the
          #third column.
          vr_true = NMatrix.new([n,n],[0,0,1,
                                       2/Math.sqrt(6),0,0,
                                       1/Math.sqrt(6),-1/Math.sqrt(6),0], dtype: dtype)
          vl_true = NMatrix.new([n,n],[0,0,1,
                                       -1/Math.sqrt(6),1/Math.sqrt(6),0,
                                       2/Math.sqrt(6),0,0], dtype: dtype)
        end

        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-15
              end

        expect(w).to be_within(err).of(w_true)
        expect(vr).to be_within(err).of(vr_true)
        expect(vl).to be_within(err).of(vl_true)
      end
    end
  end
end


================================================
FILE: spec/plugins/fftw/fftw_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == fftw_spec.rb
#
# Tests for interfaces that are only exposed by nmatrix-fftw
#

require 'spec_helper'
require "./lib/nmatrix/fftw"

describe NMatrix do
  context "#fft" do
    before do
      @answer = NMatrix.new([10],
        [ 
          Complex(330.3200,0.0000)   , Complex(-8.4039  ,-150.3269),
          Complex(-99.4807,-68.6579) , Complex(-143.6861, -20.4273),
          Complex(67.6207  ,  8.5236), Complex(130.7800 ,  0.0000),
          Complex(67.6207 ,  -8.5236), Complex(-143.6861, 20.4273),
          Complex(-99.4807 , 68.6579), Complex(-8.4039  ,150.3269)
        ], dtype: :complex128)      
    end

    it "computes an FFT of a complex NMatrix" do
      nm = NMatrix.new([10],
        [
          Complex(9.32,0), Complex(44,0), Complex(125,0), Complex(34,0),
          Complex(31,0),   Complex(44,0), Complex(12,0),  Complex(1,0),
          Complex(53.23,0),Complex(-23.23,0)], dtype: :complex128)
      expect(nm.fft.round(4)).to eq(@answer)
    end
  end

  context "#fft2" do
    it "computes 2D FFT if NMatrix has such shape" do
      input = NMatrix.new([2,2],
        [
          Complex(9.3200,0), Complex(43.0000,0),
          Complex(3.2000,0), Complex(4.0000,0)
        ], dtype: :complex128
      )
      output = NMatrix.new([2,2],
        [
          Complex(59.520,0), Complex(-34.480,0),
          Complex(45.120,0),  Complex(-32.880,0),
        ], dtype: :complex128
      )
      expect(input.fft2.round(4)).to eq(output)   
    end
  end
end

describe NMatrix::FFTW, focus: true do
  describe NMatrix::FFTW::Plan do
    context ".new" do
      it "creates a new plan for default DFT (complex input/complex output)" do
        plan = NMatrix::FFTW::Plan.new(4)
        # TODO: Figure a way to test internal C data structures.

        expect(plan.shape)    .to eq([4])
        expect(plan.size)     .to eq(4)
        expect(plan.dim)      .to eq(1)
        expect(plan.flags)     .to eq([:estimate])
        expect(plan.direction).to eq(:forward)
      end

      it "creates a new plan for multi dimensional DFT with options" do
        plan = NMatrix::FFTW::Plan.new([10,5,8],
          direction: :backward, flags: [:exhaustive, :estimate], dim: 3)

        expect(plan.shape)    .to eq([10,5,8])
        expect(plan.size)     .to eq(10*5*8)
        expect(plan.dim)      .to eq(3)
        expect(plan.flags)    .to eq([:exhaustive, :estimate])
        expect(plan.direction).to eq(:backward)
      end

      it "creates a new plan for real input/complex output" do
        plan = NMatrix::FFTW::Plan.new([5,20,10,4,2],
          direction: :forward, flags: [:patient, :exhaustive], dim: 5, 
          type: :real_complex)

        expect(plan.shape) .to eq([5,20,10,4,2])
        expect(plan.size)  .to eq(5*20*10*4*2)
        expect(plan.dim)   .to eq(5)
        expect(plan.flags) .to eq([:patient, :exhaustive])
        expect(plan.type)  .to eq(:real_complex)
      end

      it "raises error for plan with incompatible shape and dimension" do
        expect {
          NMatrix::FFTW::Plan.new([9], dim: 2, type: :real_complex)
        }.to raise_error(ArgumentError)
      end

      it "creates a new plan for real input/real output" do
        plan = NMatrix::FFTW::Plan.new([30,30], type: :real_real, 
          real_real_kind: [:rodft00, :redft10], dim: 2)

        expect(plan.shape).to eq([30,30])
        expect(plan.size) .to eq(30*30)
        expect(plan.dim)  .to eq(2)
        expect(plan.flags).to eq([:estimate])
        expect(plan.type) .to eq(:real_real)
      end

      it "creates a new plan for complex input/real output" do
        plan = NMatrix::FFTW::Plan.new([30,400], type: :complex_real, 
          dim: 2, flags: [:patient, :exhaustive])

        expect(plan.shape).to eq([30,400])
        expect(plan.size) .to eq(30*400)
        expect(plan.dim)  .to eq(2)
        expect(plan.flags).to eq([:patient, :exhaustive])
        expect(plan.type) .to eq(:complex_real)
      end
    end

    context "#set_input" do
      it "accepts nothing but complex128 input for the default or complex_real plan" do
        plan  = NMatrix::FFTW::Plan.new(4)
        input = NMatrix.new([4], [23.54,52.34,52.345,64], dtype: :float64)
        expect {
          plan.set_input(input)
        }.to raise_error(ArgumentError)

        plan = NMatrix::FFTW::Plan.new(4, type: :complex_real)
        expect {
          plan.set_input input
        }.to raise_error(ArgumentError)
      end

      it "accepts nothing but float64 input for real_complex or real_real plan" do
        plan = NMatrix::FFTW::Plan.new(4, type: :real_complex)
        input = NMatrix.new([4], [1,2,3,4], dtype: :int32)

        expect {
          plan.set_input(input)
        }.to raise_error(ArgumentError)
      end
    end

    context "#execute" do
      it "calculates a basic 1D DFT" do
        input = NMatrix.new([10],
          [
            Complex(9.32,0),
            Complex(44,0),
            Complex(125,0),
            Complex(34,0),
            Complex(31,0),
            Complex(44,0),
            Complex(12,0),
            Complex(1,0),
            Complex(53.23,0),
            Complex(-23.23,0),
          ], dtype: :complex128)

        output = NMatrix.new([10],
          [
            Complex(330.3200,0.0000),
            Complex(-8.4039  ,-150.3269),
            Complex(-99.4807 , -68.6579),
            Complex(-143.6861, -20.4273),
            Complex(67.6207  ,  8.5236),
            Complex(130.7800 ,  0.0000),
            Complex(67.6207  ,  -8.5236),
            Complex(-143.6861, 20.4273),
            Complex(-99.4807 , 68.6579),
            Complex(-8.4039  ,150.3269)
          ], dtype: :complex128)

        plan = NMatrix::FFTW::Plan.new(10)
        plan.set_input input
        expect(plan.execute).to eq(true)
        expect(plan.output.round(4)).to eq(output)
      end

      it "calculates 2D DFT with options" do
        input = NMatrix.new([2,2],
          [
            Complex(9.3200,0), Complex(43.0000,0),
            Complex(3.2000,0), Complex(4.0000,0)
          ], dtype: :complex128
        )

        output = NMatrix.new([2,2],
          [
            Complex(59.520,0), Complex(-34.480,0),
            Complex(45.120,0),  Complex(-32.880,0),
          ], dtype: :complex128
        )

        plan = NMatrix::FFTW::Plan.new([2,2],
          direction: :forward, flags: :estimate, dim: 2)
        plan.set_input input
        expect(plan.execute).to eq(true)
        expect(plan.output).to eq(output)
      end

      it "calculates ND DFT with options" do

      end

      it "calculates 1D real input/complex output DFT" do
        input  = NMatrix.new([4], [3.10, 1.73, 1.04, 2.83], dtype: :float64)
        output = NMatrix.new([3], 
          [Complex(8.70, 0), Complex(2.06, 1.1), Complex(-0.42, 0)], dtype: :complex128)
        plan = NMatrix::FFTW::Plan.new([4], type: :real_complex)
        plan.set_input input
        expect(plan.execute).to eq(true)
        expect(plan.output).to eq(output)
      end

      it "calculates 2D real input/complex output DFT" do
        input = NMatrix.new([16], [
          1  ,   5,54    ,656,
          4.3,1.32,-43.34,14 ,
          1  ,   5,    54,656,
          4.3,1.32,-43.34,14
          ], dtype: :float64) 
        output = NMatrix.new([9],
          [
            Complex(1384.56, 0.0),
            Complex(-10.719999999999999, 1327.36),
            Complex(-1320.72, 0.0),
            Complex(0.0, 0.0),
            Complex(0.0, 0.0),
            Complex(0.0, 0.0),
            Complex(1479.44, 0.0),
            Complex(-201.28, 1276.64),
            Complex(-1103.28, 0.0)
          ], dtype: :complex128
        )

        plan = NMatrix::FFTW::Plan.new([4,4], type: :real_complex, dim: 2)
        plan.set_input input
        expect(plan.execute).to eq(true)
        expect(plan.output).to eq(output)
      end

      it "calculates 1D complex input/real output DFT" do
        input = NMatrix.new([8],
          [
            Complex(9.32,0),
            Complex(43.0,0),
            Complex(3.20,0),
            Complex(4.00,0),
            Complex(5.32,0),
            Complex(3.20,0),
            Complex(4.00,0),
            Complex(5.32,0)
          ], dtype: :complex128)

        output = NMatrix.new([8], [
            115.04,59.1543,8.24,-51.1543,-72.96,-51.1543,8.24,59.1543
          ], dtype: :float64)

        plan = NMatrix::FFTW::Plan.new([8], type: :complex_real)
        plan.set_input input
        expect(plan.execute).to eq(true)
        expect(plan.output.round(4)).to eq(output)
      end

      it "calculates 2D complex input/real output DFT" do
        input = NMatrix.new([9],
          [
            Complex(9.32,0),
            Complex(43.0,0),
            Complex(3.20,0),
            Complex(4.00,0),
            Complex(5.32,0),
            Complex(3.20,0),
            Complex(4.00,0),
            Complex(5.32,0),
            Complex(45.32,0)
          ], dtype: :complex128)
        output = NMatrix.new([9], [
            118.24,-32.36,-32.36,83.86,-35.54,-33.14,83.86,-33.14,-35.54
          ], dtype: :float64)

        plan = NMatrix::FFTW::Plan.new([3,3], type: :complex_real, dim: 2)
        plan.set_input input
        expect(plan.execute).to eq(true)
        expect(plan.output.round(2)) .to eq(output)
      end

      it "calculates basic 1D real input/real output DFT of kind RODFT00" do
        input = NMatrix.new([9],
          [9.32,43.00,3.20,4.00,5.32,3.20,4.00,5.32,45.32], dtype: :float64)
        output = NMatrix.new([9],
          [126.56,28.77,165.67,-24.76,105.52,-110.31,-1.23,-116.45,-14.44],
          dtype: :float64)
        plan = NMatrix::FFTW::Plan.new([9], type: :real_real, real_real_kind: [:rodft00])
        plan.set_input input
        expect(plan.execute).to eq(true)
        expect(plan.output.round(2)).to eq(output)
      end

      it "calculates basic 1D real input/real output DFT of kind REDFT10" do
        input = NMatrix.new([9],
          [9.32,43.00,3.20,4.00,5.32,3.20,4.00,5.32,45.32], dtype: :float64)
        output = NMatrix.new([9],
          [245.36,-6.12,126.84,-62.35,35.00,-109.42,-38.24,-92.49,-21.20], 
          dtype: :float64)

        plan = NMatrix::FFTW::Plan.new([9], type: :real_real, real_real_kind: [:redft10])
        plan.set_input input
        expect(plan.execute).to eq(true)
        expect(plan.output.round(2)).to eq(output)
      end

      it "calculates 2D DFT for real input/real output of kind REDFT10, REDFT11" do
        input = NMatrix.new([9],
          [9.32,43.00,3.20,4.00,5.32,3.20,4.00,5.32,45.32], dtype: :float64)
        output = NMatrix.new([9],
          [272.181,-249.015,66.045,72.334,23.907,-228.463,85.368,-105.331,30.836],
          dtype: :float64)

        plan = NMatrix::FFTW::Plan.new([3,3], type: :real_real, 
          real_real_kind: [:redft10, :redft11], dim: 2)
        plan.set_input input
        expect(plan.execute).to eq(true)
        expect(plan.output.round(3)) .to eq(output)
      end
    end
  end
end


================================================
FILE: spec/plugins/lapacke/lapacke_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == lapacke_spec.rb
#
# Tests for interfaces that are only exposed by nmatrix-lapacke
#

require 'spec_helper'
require "./lib/nmatrix/lapacke"

describe "NMatrix::LAPACK functions implemented with LAPACKE interface" do
  [:float32, :float64, :complex64, :complex128].each do |dtype|
    context dtype do
      it "exposes lapacke_getrf" do
        a = NMatrix.new([3,4], GETRF_EXAMPLE_ARRAY, dtype: dtype)
        ipiv = NMatrix::LAPACK.lapacke_getrf(:row, 3, 4, a, 4)
        b = NMatrix.new([3,4], GETRF_SOLUTION_ARRAY, dtype: dtype)
        ipiv_true = [2,3,3]

        # delta varies for different dtypes
        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-14
              end

        expect(a).to be_within(err).of(b)
        expect(ipiv).to eq(ipiv_true)
      end

      it "exposes lapacke_getri" do
        a = NMatrix.new(:dense, 3, [1,0,4,1,1,6,-3,0,-10], dtype)
        ipiv = NMatrix::LAPACK::lapacke_getrf(:row, 3, 3, a, 3) # get pivot from getrf, use for getri

        # delta varies for different dtypes
        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        NMatrix::LAPACK::lapacke_getri(:row, 3, a, 3, ipiv)

        b = NMatrix.new(:dense, 3, [-5,0,-2,-4,1,-1,1.5,0,0.5], dtype)
        expect(a).to be_within(err).of(b)
      end

      it "exposes lapacke_getrs with vector solutions" do
        a     = NMatrix.new(3, [-2,4,-3,3,-2,1,0,-4,3], dtype: dtype)
        ipiv  = NMatrix::LAPACK::lapacke_getrf(:row, 3, 3, a, 3)
        b     = NMatrix.new([3,1], [-1, 17, -9], dtype: dtype)

        #be careful! the leading dimenension (lda,ldb) is the number of rows for row-major in LAPACKE. Different from CLAPACK convention!
        NMatrix::LAPACK::lapacke_getrs(:row, false, 3, 1, a, 3, ipiv, b, 1)

        # delta varies for different dtypes
        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-13
              end

        expect(b[0]).to be_within(err).of(5)
        expect(b[1]).to be_within(err).of(-15.0/2)
        expect(b[2]).to be_within(err).of(-13)
      end

      it "exposes lapacke_getrs with matrix solutions" do
        a     = NMatrix.new(3, [-2,4,-3,3,-2,1,0,-4,3], dtype: dtype)
        ipiv  = NMatrix::LAPACK::lapacke_getrf(:row, 3, 3, a, 3)
        b     = NMatrix.new([3,2], [-1, 2, 17, 10, -9, 1], dtype: dtype)

        #be careful! the leading dimenension (lda,ldb) is the number of rows for row-major in LAPACKE. Different from CLAPACK convention!
        NMatrix::LAPACK::lapacke_getrs(:row, false, 3, 2, a, 3, ipiv, b, 2)

        # delta varies for different dtypes
        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-13
              end

        x = NMatrix.new([3,2], [5, -1.5, -7.5, -21.25, -13, -28], dtype: dtype)
        expect(b).to be_within(err).of(x)
      end

      it "exposes lapacke_potrf" do
        # first do upper
        begin
          a = NMatrix.new(:dense, 3, [25,15,-5, 0,18,0, 0,0,11], dtype)
          NMatrix::LAPACK::lapacke_potrf(:row, :upper, 3, a, 3)
          b = NMatrix.new(:dense, 3, [5,3,-1, 0,3,1, 0,0,3], dtype)
          expect(a).to eq(b)
        end

        # then do lower
        a = NMatrix.new(:dense, 3, [25,0,0, 15,18,0,-5,0,11], dtype)
        NMatrix::LAPACK::lapacke_potrf(:row, :lower, 3, a, 3)
        b = NMatrix.new(:dense, 3, [5,0,0, 3,3,0, -1,1,3], dtype)
        expect(a).to eq(b)
      end

      it "exposes lapacke_potri" do
        a = NMatrix.new(3, [4, 0,-1,
                            0, 2, 1,
                            0, 0, 1], dtype: dtype)
        NMatrix::LAPACK::lapacke_potrf(:row, :upper, 3, a, 3)
        NMatrix::LAPACK::lapacke_potri(:row, :upper, 3, a, 3)
        b = NMatrix.new(3, [0.5, -0.5, 1,  0, 1.5, -2,  0, 0, 4], dtype: dtype)
        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-14
              end
        expect(a).to be_within(err).of(b)
      end

      it "exposes lapacke_potrs with vector solution" do
        a = NMatrix.new(3, [4, 0,-1,
                            0, 2, 1,
                            0, 0, 1], dtype: dtype)
        b = NMatrix.new([3,1], [3,0,2], dtype: dtype)

        NMatrix::LAPACK::lapacke_potrf(:row, :upper, 3, a, 3)
        #ldb is different from CLAPACK versions
        NMatrix::LAPACK::lapacke_potrs(:row, :upper, 3, 1, a, 3, b, 1)

        x = NMatrix.new([3,1], [3.5, -5.5, 11], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(b).to be_within(err).of(x)
      end

      it "exposes lapacke_potrs with matrix solution" do
        a = NMatrix.new(3, [4, 0,-1,
                            0, 2, 1,
                            0, 0, 1], dtype: dtype)
        b = NMatrix.new([3,2], [3,4,
                                0,4,
                                2,0], dtype: dtype)

        NMatrix::LAPACK::lapacke_potrf(:row, :upper, 3, a, 3)
        #ldb is different from CLAPACK versions
        NMatrix::LAPACK::lapacke_potrs(:row, :upper, 3, 2, a, 3, b, 2)

        x = NMatrix.new([3,2], [3.5, 0,
                                -5.5, 4,
                                11, -4], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(b).to be_within(err).of(x)
      end

      it "calculates the singular value decomposition with lapacke_gesvd" do
        #example from Wikipedia
        m = 4
        n = 5
        mn_min = [m,n].min
        a = NMatrix.new([m,n],[1,0,0,0,2, 0,0,3,0,0, 0,0,0,0,0, 0,4,0,0,0], dtype: dtype)
        s = NMatrix.new([mn_min], 0, dtype: a.abs_dtype) #s is always real and always returned as float/double, never as complex
        u = NMatrix.new([m,m], 0, dtype: dtype)
        vt = NMatrix.new([n,n], 0, dtype: dtype)
        superb = NMatrix.new([mn_min-1], dtype: a.abs_dtype)

        NMatrix::LAPACK.lapacke_gesvd(:row, :a, :a, m, n, a, n, s, u, m, vt, n, superb)

        s_true = NMatrix.new([mn_min], [4,3,Math.sqrt(5),0], dtype: a.abs_dtype)
        u_true = NMatrix.new([m,m], [0,0,1,0, 0,1,0,0, 0,0,0,-1, 1,0,0,0], dtype: dtype)
        vt_true = NMatrix.new([n,n], [0,1,0,0,0, 0,0,1,0,0, Math.sqrt(0.2),0,0,0,Math.sqrt(0.8), 0,0,0,1,0, -Math.sqrt(0.8),0,0,0,Math.sqrt(0.2)], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(s).to be_within(err).of(s_true)
        expect(u).to be_within(err).of(u_true)
        expect(vt).to be_within(err).of(vt_true)
      end

      it "calculates the singular value decomposition with lapacke_gesdd" do
        #example from Wikipedia
        m = 4
        n = 5
        mn_min = [m,n].min
        a = NMatrix.new([m,n],[1,0,0,0,2, 0,0,3,0,0, 0,0,0,0,0, 0,4,0,0,0], dtype: dtype)
        s = NMatrix.new([mn_min], 0, dtype: a.abs_dtype) #s is always real and always returned as float/double, never as complex
        u = NMatrix.new([m,m], 0, dtype: dtype)
        vt = NMatrix.new([n,n], 0, dtype: dtype)

        NMatrix::LAPACK.lapacke_gesdd(:row, :a, m, n, a, n, s, u, m, vt, n)

        s_true = NMatrix.new([mn_min], [4,3,Math.sqrt(5),0], dtype: a.abs_dtype)
        u_true = NMatrix.new([m,m], [0,0,1,0, 0,1,0,0, 0,0,0,-1, 1,0,0,0], dtype: dtype)
        vt_true = NMatrix.new([n,n], [0,1,0,0,0, 0,0,1,0,0, Math.sqrt(0.2),0,0,0,Math.sqrt(0.8), 0,0,0,1,0, -Math.sqrt(0.8),0,0,0,Math.sqrt(0.2)], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-5
                when :float64, :complex128
                  1e-14
              end

        expect(s).to be_within(err).of(s_true)
        expect(u).to be_within(err).of(u_true)
        expect(vt).to be_within(err).of(vt_true)
      end

      it "calculates eigenvalues and eigenvectors using lapacke_geev" do
        n = 3
        a = NMatrix.new([n,n], [-1,0,0, 0,1,-2, 0,1,-1], dtype: dtype)
        w = NMatrix.new([n], dtype: dtype)
        if a.complex_dtype? #for real dtypes, imaginary parts of eigenvalues are stored in separate vector
          wi = nil
        else
          wi = NMatrix.new([n], dtype: dtype)
        end
        vl = NMatrix.new([n,n], dtype: dtype)
        vr = NMatrix.new([n,n], dtype: dtype)

        NMatrix::LAPACK.lapacke_geev(:row, :t, :t, n, a, n, w, wi, vl, n, vr, n)

        if !a.complex_dtype?
          w = w + wi*Complex(0,1)
        end

        w_true = NMatrix.new([n], [Complex(0,1), -Complex(0,1), -1], dtype: NMatrix.upcast(dtype, :complex64))
        if a.complex_dtype?
          #For complex types the right/left eigenvectors are stored as columns
          #of vr/vl.
          vr_true = NMatrix.new([n,n],[0,0,1,
                                       2/Math.sqrt(6),2/Math.sqrt(6),0,
                                       Complex(1,-1)/Math.sqrt(6),Complex(1,1)/Math.sqrt(6),0], dtype: dtype)
          vl_true = NMatrix.new([n,n],[0,0,1,
                                       Complex(-1,1)/Math.sqrt(6),Complex(-1,-1)/Math.sqrt(6),0,
                                       2/Math.sqrt(6),2/Math.sqrt(6),0], dtype: dtype)
        else
          #For real types, the real part of the first and second eigenvectors is
          #stored in the first column, the imaginary part of the first (= the
          #negative of the imaginary part of the second) eigenvector is stored
          #in the second column, and the third eigenvector (purely real) is the
          #third column.
          vr_true = NMatrix.new([n,n],[0,0,1,
                                       2/Math.sqrt(6),0,0,
                                       1/Math.sqrt(6),-1/Math.sqrt(6),0], dtype: dtype)
          vl_true = NMatrix.new([n,n],[0,0,1,
                                       -1/Math.sqrt(6),1/Math.sqrt(6),0,
                                       2/Math.sqrt(6),0,0], dtype: dtype)
        end

        err = case dtype
                when :float32, :complex64
                  1e-6
                when :float64, :complex128
                  1e-15
              end

        expect(w).to be_within(err).of(w_true)
        expect(vr).to be_within(err).of(vr_true)
        expect(vl).to be_within(err).of(vl_true)
      end
      
      it "exposes lapacke_geqrf" do
        a = NMatrix.new(3, [12.0, -51.0,   4.0, 
                             6.0, 167.0, -68.0, 
                            -4.0,  24.0, -41.0] , dtype: dtype)

        b = NMatrix.new([3,1], 0, dtype: dtype)

        NMatrix::LAPACK::lapacke_geqrf(:row, a.shape[0], a.shape[1], a, a.shape[1], b)

        x = NMatrix.new([3,1], TAU_SOLUTION_ARRAY, dtype: dtype)
     
        y = NMatrix.new([3,3], GEQRF_SOLUTION_ARRAY, dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-14
              end
        
        expect(b).to be_within(err).of(x)
        expect(a).to be_within(err).of(y)      
      end

      it "calculates QR decomposition in a compressed format using geqrf!" do
        a = NMatrix.new(3, [12.0, -51.0,   4.0, 
                             6.0, 167.0, -68.0, 
                            -4.0,  24.0, -41.0] , dtype: dtype)

        tau = a.geqrf!
    
        x = NMatrix.new([3,1], TAU_SOLUTION_ARRAY, dtype: dtype)
     
        y = NMatrix.new([3,3], GEQRF_SOLUTION_ARRAY, dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-14
              end
        
        expect(tau).to be_within(err).of(x)
        expect(a).to be_within(err).of(y)      
      end

      it "exposes lapacke_ormqr and lapacke_unmqr" do
        a = NMatrix.new([4,2], [34.0,  21.0, 
                                23.0,  53.0, 
                                26.0, 346.0, 
                                23.0, 121.0] , dtype: dtype)

        tau = NMatrix.new([2,1], dtype: dtype)
        result = NMatrix.identity(4, dtype: dtype)
        
        # get tau from geqrf, use for ormqr  
        NMatrix::LAPACK::lapacke_geqrf(:row, a.shape[0], a.shape[1], a, a.shape[1], tau)

        #Q is stored in result 
        a.complex_dtype? ?
          NMatrix::LAPACK::lapacke_unmqr(:row, :left, false, result.shape[0], result.shape[1], tau.shape[0], 
                                                                a, a.shape[1], tau, result, result.shape[1])
          :

          NMatrix::LAPACK::lapacke_ormqr(:row, :left, false, result.shape[0], result.shape[1], tau.shape[0], 
                                                                a, a.shape[1], tau, result, result.shape[1])

        x = NMatrix.new([4,4], Q_SOLUTION_ARRAY_1, dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-14
              end

        expect(result).to be_within(err).of(x)      
      end

      it "calculates the product of the orthogonal matrix with an arbitrary matrix" do
        a = N.new([2,2], [34.0, 21, 23, 53] , dtype: dtype)

        tau = NMatrix.new([2,1], dtype: dtype)
        
        #Result is the multiplicand that gets overriden : result = Q * result
        result   = NMatrix.new([2,2], [2,0,0,2], dtype: dtype)
        
        # get tau from geqrf, use for ormqr  
        NMatrix::LAPACK::lapacke_geqrf(:row, a.shape[0], a.shape[1], a, a.shape[1], tau)

        #Q is stored in result 
        a.complex_dtype? ?
          NMatrix::LAPACK::lapacke_unmqr(:row, :left, false, result.shape[0], result.shape[1], tau.shape[0], 
                                                                a, a.shape[1], tau, result, result.shape[1])
          :

          NMatrix::LAPACK::lapacke_ormqr(:row, :left, false, result.shape[0], result.shape[1], tau.shape[0], 
                                                                a, a.shape[1], tau, result, result.shape[1])

        x = NMatrix.new([2,2], [-1.6565668262559257 , -1.1206187354084205, 
                                -1.1206187354084205 , 1.6565668262559263], dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-14
              end

        expect(result).to be_within(err).of(x)      
      end
      
      it "calculates the orthogonal matrix Q using ormqr/unmqr after geqrf!" do
        a = NMatrix.new([4,2], [34.0,  21.0, 
                                23.0,  53.0, 
                                26.0, 346.0, 
                                23.0, 121.0] , dtype: dtype)
        
        # get tau from geqrf, use for ormqr  
        tau = a.geqrf!

        #Q is stored in result 
        result = a.complex_dtype? ? a.unmqr(tau) : a.ormqr(tau)
          

        x = NMatrix.new([4,4], Q_SOLUTION_ARRAY_1, dtype: dtype)

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-14
              end

        expect(result).to be_within(err).of(x)      
      end
    end
    
    it "calculates the transpose of Q using ormqr/unmqr after geqrf!" do
        a = NMatrix.new([4,2], [34.0,  21.0, 
                                23.0,  53.0, 
                                26.0, 346.0, 
                                23.0, 121.0] , dtype: dtype)
        
        # get tau from geqrf, use for ormqr  
        tau = a.geqrf!

        #Q is stored in result 
        result = a.complex_dtype? ? a.unmqr(tau, :left, :complex_conjugate) : a.ormqr(tau, :left, :transpose)
          

        x = NMatrix.new([4,4], Q_SOLUTION_ARRAY_1, dtype: dtype)
        x = x.transpose

        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-14
              end

        expect(result).to be_within(err).of(x)      
    end

    it "calculates the multiplication c * Q using ormqr/unmqr after geqrf!" do
        a = NMatrix.new(3, [12.0, -51.0,   4.0, 
                             6.0, 167.0, -68.0, 
                            -4.0,  24.0, -41.0] , dtype: dtype)
        
        # get tau from geqrf, use for ormqr  
        tau = a.geqrf!
        c = NMatrix.new([2,3], [1,0,1,0,0,1], dtype: dtype)

        #Q is stored in result 
        result = a.complex_dtype? ? a.unmqr(tau, :right, false, c) : a.ormqr(tau, :right, false, c)
          
        solution = NMatrix.new([2,3], [-0.5714285714285714,   0.2228571428571429, 1.2742857142857142,
                                        0.28571428571428575, -0.1714285714285714, 0.9428571428571428] , dtype: dtype)
        err = case dtype
                when :float32, :complex64
                  1e-4
                when :float64, :complex128
                  1e-14
              end

        expect(result).to be_within(err).of(solution)      
    end
  end
end


================================================
FILE: spec/rspec_monkeys.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == rspec_monkeys.rb
#
# A set of monkey patches for RSpec allowing checks of NMatrix types
#

module RSpec::Matchers::BuiltIn
  class BeWithin

    def of(expected)
      @expected = expected
      @unit     = ''
      if expected.is_a?(NMatrix)
        @tolerance = if @delta.is_a?(NMatrix)
                       @delta.clone
                     elsif @delta.is_a?(Array)
                       NMatrix.new(:dense, expected.shape, @delta, expected.dtype)
                     else
                       NMatrix.ones_like(expected) * @delta
                     end
      else
        @tolerance = @delta
      end

      self
    end

    def percent_of(expected)
      @expected  = expected
      @unit      = '%'
      @tolerance = @expected.abs * @delta / 100.0 # <- only change is to reverse abs and @delta
      self
    end
  end
end

================================================
FILE: spec/rspec_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == rspec_spec.rb
#
# A spec for testing monkey patches to RSpec for NMatrix.
#
require 'spec_helper'

describe "RSpec" do
  it "should permit #be_within to be used on a dense NMatrix" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    expect(NMatrix.new([4,1], 1.0, dtype: :complex128, stype: :dense) / 10000.0).to be_within(0.00000001).of(NMatrix.new([4,1], 0.0001, dtype: :float64, stype: :dense))
    expect(NMatrix.new([4,1], 1.0, dtype: :complex128, stype: :dense) / 10000.0).not_to be_within(0.00000001).of(NMatrix.new([4,1], 1.0, dtype: :float64, stype: :dense))
  end
end


================================================
FILE: spec/shortcuts_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == shortcuts_spec.rb
#
# Specs for the shortcuts used in NMatrix and in NVector.
#

require 'spec_helper'
require 'pry'

describe NMatrix do
  it "zeros() creates a matrix of zeros" do
    m = NMatrix.zeros(3)
    n = NMatrix.new([3, 3], 0)

    expect(m).to eq n
  end

  it "ones() creates a matrix of ones" do
    m = NMatrix.ones(3)
    n = NMatrix.new([3, 3], 1)

    expect(m).to eq n
  end

  it "eye() creates an identity matrix" do
    m = NMatrix.eye(3)
    identity3 = NMatrix.new([3, 3], [1, 0, 0, 0, 1, 0, 0, 0, 1])

    expect(m).to eq identity3
  end

  it "hilbert() creates an hilbert matrix" do
    m = NMatrix.hilbert(8)
    expect(m[4, 0]).to be_within(0.000001).of(0.2)
    expect(m[4, 1]).to be_within(0.000001).of(0.16666666666666666)
    expect(m[4, 2]).to be_within(0.000001).of(0.14285714285714285)
    expect(m[4, 3]).to be_within(0.000001).of(0.125)

    m = NMatrix.hilbert(3)
    hilbert3 = NMatrix.new([3, 3], [1.0, 0.5, 0.3333333333333333,\
     0.5, 0.3333333333333333, 0.25, 0.3333333333333333, 0.25, 0.2])
    expect(m).to eq hilbert3
    0.upto(2) do |i|
      0.upto(2) do |j|
        expect(m[i, j]).to be_within(0.000001).of(hilbert3[i,j])
      end
    end
  end

  it "inv_hilbert() creates an inverse hilbert matrix" do
    m = NMatrix.inv_hilbert(6)
    inv_hilbert6 = [3360.0,  -88200.0,   564480.0, -1411200.0]
    expect(m[2,0]).to be_within(0.000001).of(inv_hilbert6[0])
    expect(m[2,1]).to be_within(0.000001).of(inv_hilbert6[1])
    expect(m[2,2]).to be_within(0.000001).of(inv_hilbert6[2])
    expect(m[2,3]).to be_within(0.000001).of(inv_hilbert6[3])

    m = NMatrix.inv_hilbert(3)
    inv_hilbert3 = NMatrix.new([3, 3], [  9.0,  -36.0,   30.0, -36.0,  192.0, -180.0, 30.0, -180.0,  180.0] )
    0.upto(2) do |i|
      0.upto(2) do |j|
        expect(m[i, j]).to be_within(0.000001).of(inv_hilbert3[i,j])
      end
    end
  end

  it "diag() creates a matrix with pre-supplied diagonal" do
    arr = [1,2,3,4]
    m = NMatrix.diag(arr)
    expect(m.is_a?(NMatrix)).to be true
  end

  it "diagonals() contains the seeded values on the diagonal" do
    arr = [1,2,3,4]
    m = NMatrix.diagonals(arr)
    expect(m[0,0]).to eq(arr[0])
    expect(m[1,1]).to eq(arr[1])
    expect(m[2,2]).to eq(arr[2])
    expect(m[3,3]).to eq(arr[3])
  end

  ALL_DTYPES.each do |dtype|
    [:dense, :yale, :list].each do |stype|
      context "#block_diagonal #{dtype} #{stype}" do
        it "block_diagonal() creates a block-diagonal NMatrix" do
          pending("not yet implemented for NMatrix-JRuby") if jruby? and dtype == :object
          a = NMatrix.new([2,2], [1,2,
                                  3,4])
          b = NMatrix.new([1,1], [123.0])
          c = NMatrix.new([3,3], [1,2,3,
                                  1,2,3,
                                  1,2,3])
          d = Array[ [1,1,1], [2,2,2], [3,3,3] ]
          e = 12
          m = NMatrix.block_diagonal(a, b, c, d, e, dtype: dtype, stype: stype)
          expect(m).to eq(NMatrix.new([10,10], [1, 2,   0, 0, 0, 0, 0, 0, 0,  0,
                                                3, 4,   0, 0, 0, 0, 0, 0, 0,  0,
                                                0, 0, 123, 0, 0, 0, 0, 0, 0,  0,
                                                0, 0,   0, 1, 2, 3, 0, 0, 0,  0,
                                                0, 0,   0, 1, 2, 3, 0, 0, 0,  0,
                                                0, 0,   0, 1, 2, 3, 0, 0, 0,  0,
                                                0, 0,   0, 0, 0, 0, 1, 1, 1,  0,
                                                0, 0,   0, 0, 0, 0, 2, 2, 2,  0,
                                                0, 0,   0, 0, 0, 0, 3, 3, 3,  0,
                                                0, 0,   0, 0, 0, 0, 0, 0, 0, 12], dtype: dtype, stype: stype))
        end
      end
    end
  end

  context "::random" do
    it "creates a matrix of random numbers" do
      m = NMatrix.random(2)

      expect(m.stype).to eq(:dense)
      expect(m.dtype).to eq(:float64)
    end

    it "creates a matrix of random numbers with defined seed value" do
      m1 = NMatrix.random(2,:seed => 62)
      m2 = NMatrix.random(2,:seed => 62)
      m3 = NMatrix.random(2,:seed => 65)


      expect(m1).to eq(m2)
      expect(m1).not_to eq(m3)

    end

    it "creates a complex matrix of random numbers" do
      m = NMatrix.random(2, :dtype => :complex128)
    end

    it "correctly accepts :scale parameter" do
      m = NMatrix.random([2,2], dtype: :byte, scale: 255)
      m.each do |v|
        expect(v).to be >= 0
        expect(v).to be < 255
      end
    end

    it "only accepts an integer or an array as dimension" do
      m = NMatrix.random([2, 2])

      expect(m.stype).to eq(:dense)
      expect(m.dtype).to eq(:float64)

      expect { NMatrix.random(2.0) }.to raise_error
      expect { NMatrix.random("not an array or integer") }.to raise_error
    end
  end

  context "::magic" do

    ALL_DTYPES.each do |dtype|
      context dtype do
        it "creates a matrix with numbers from 1 to n^n(n squared)" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          a = NMatrix.magic(3, dtype: dtype)
          magic3 = NMatrix.new([3,3], [4, 9, 2, 3, 5, 7, 8, 1, 6], dtype: dtype)
          expect(a).to eq magic3

          b = NMatrix.magic(4, dtype: dtype)
          magic4 = NMatrix.new([4,4], [1, 15, 14,  4, 12,  6,  7, 9, 8, 10, 11, 5, 13, 3, 2, 16], dtype: dtype)
          expect(b).to eq magic4

          c = NMatrix.magic(6, dtype: dtype)
          magic6 = NMatrix.new([6,6], [31, 9, 2, 22, 27, 20, 3, 32, 7, 21, 23, 25, 35, 1, 6, 26, 19, 24, 4, 36, 29, 13, 18, 11, 30, 5, 34, 12, 14, 16, 8, 28, 33, 17, 10, 15], dtype: dtype)
          expect(c).to eq magic6
        end
      end
    end

    it "shape of two is not allowed" do
      expect { NMatrix.magic(2) }.to raise_error(ArgumentError)
    end

    it "Only accepts an integer as dimension" do
      expect { NMatrix.magic(3.0) }.to raise_error(ArgumentError)
    end
  end

  context "::linspace" do
    it "creates a row vector when given only one shape parameter" do
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      v = NMatrix.linspace(1, 10, 4)
      #Expect a row vector only
      expect(v.shape.length).to eq(1)

      ans = [1.0,4.0,7.0,10.0]

      expect(v[0]).to be_within(0.000001).of(ans[0])
      expect(v[1]).to be_within(0.000001).of(ans[1])
      expect(v[2]).to be_within(0.000001).of(ans[2])
      expect(v[3]).to be_within(0.000001).of(ans[3])
    end

    it "creates a matrix of input shape with each entry linearly spaced in row major order" do
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      v = NMatrix.linspace(1, Math::PI, [2,2])
      expect(v.dtype).to eq(:float64)

      ans = [1.0, 1.7138642072677612, 2.4277284145355225, 3.1415927410125732]

      expect(v[0,0]).to be_within(0.000001).of(ans[0])
      expect(v[0,1]).to be_within(0.000001).of(ans[1])
      expect(v[1,0]).to be_within(0.000001).of(ans[2])
      expect(v[1,1]).to be_within(0.000001).of(ans[3])
    end
  end

  context "::logspace" do
    it "creates a logarithmically spaced vector" do
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      v = NMatrix.logspace(1, 2, 10)

      expect(v.shape.length).to eq(1)

      #Unit test taken from Matlab R2015b output of logspace(1,2,10)
      ans = [10.0000, 12.9155, 16.6810, 21.5443, 27.8256, 35.9381, 46.4159, 59.9484, 77.4264, 100.0000]

      expect(v[0].round(4)).to be_within(0.000001).of(ans[0])
      expect(v[1].round(4)).to be_within(0.000001).of(ans[1])
      expect(v[2].round(4)).to be_within(0.000001).of(ans[2])
      expect(v[3].round(4)).to be_within(0.000001).of(ans[3])
      expect(v[4].round(4)).to be_within(0.000001).of(ans[4])
      expect(v[5].round(4)).to be_within(0.000001).of(ans[5])
      expect(v[6].round(4)).to be_within(0.000001).of(ans[6])
      expect(v[7].round(4)).to be_within(0.000001).of(ans[7])
      expect(v[8].round(4)).to be_within(0.000001).of(ans[8])
      expect(v[9].round(4)).to be_within(0.000001).of(ans[9])
    end

    it "creates a logarithmically spaced vector bounded by Math::PI if :pi is pre-supplied" do
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      v = NMatrix.logspace(1, :pi, 7)

      #Unit test taken from Matlab R2015b output of logspace(1,pi,10)
      ans = [10.0000, 8.2450, 6.7980, 5.6050, 4.6213, 3.8103, 3.1416]

      expect(v[0].round(4)).to be_within(0.000001).of(ans[0])
      expect(v[1].round(4)).to be_within(0.000001).of(ans[1])
      expect(v[2].round(4)).to be_within(0.000001).of(ans[2])
      expect(v[3].round(4)).to be_within(0.000001).of(ans[3])
      expect(v[4].round(4)).to be_within(0.000001).of(ans[4])
      expect(v[5].round(4)).to be_within(0.000001).of(ans[5])
      expect(v[6].round(4)).to be_within(0.000001).of(ans[6])
    end

    it "creates a matrix of input shape with each entry logarithmically spaced in row major order" do
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      v = NMatrix.logspace(1, 2, [3,2])

      ans = [10.0, 15.8489, 25.1189, 39.8107, 63.0957, 100.0]

      expect(v[0,0].round(4)).to be_within(0.000001).of(ans[0])
      expect(v[0,1].round(4)).to be_within(0.000001).of(ans[1])
      expect(v[1,0].round(4)).to be_within(0.000001).of(ans[2])
      expect(v[1,1].round(4)).to be_within(0.000001).of(ans[3])
      expect(v[2,0].round(4)).to be_within(0.000001).of(ans[4])
      expect(v[2,1].round(4)).to be_within(0.000001).of(ans[5])
    end
  end

  it "seq() creates a matrix of integers, sequentially" do
    m = NMatrix.seq(2) # 2x2 matrix.
    value = 0

    2.times do |i|
      2.times do |j|
        expect(m[i,j]).to eq(value)
        value += 1
      end
    end
  end

  it "indgen() creates a matrix of integers as well as seq()" do
    m = NMatrix.indgen(2) # 2x2 matrix.
    value = 0

    2.times do |i|
      2.times do |j|
        expect(m[i, j]).to eq(value)
        value += 1
      end
    end
  end

  it "findgen creates a matrix of floats, sequentially" do
    m = NMatrix.findgen(2) # 2x2 matrix.
    value = 0

    2.times do |i|
      2.times do |j|
        expect(m[i, j]/10).to be_within(Float::EPSILON).of(value.to_f/10)
        value += 1
      end
    end
  end

  it "bindgen() creates a matrix of bytes" do
    m = NMatrix.bindgen(2) # 2x2 matrix.
    value = 0

    2.times do |i|
      2.times do |j|
        expect(m[i, j]).to eq(value)
        value += 1
      end
    end
  end

  it "cindgen() creates a matrix of complexes" do
    m = NMatrix.cindgen(2) # 2x2 matrix.
    value = 0

    2.times do |i|
      2.times do |j|
        expect(m[i, j].real).to be_within(Float::EPSILON).of(value)
        expect(m[i, j].imag).to be_within(Float::EPSILON).of(0.0)
        value += 1
      end
    end
  end

  it "column() returns a NMatrix" do
    m = NMatrix.random(3)

    expect(m.column(2).is_a?(NMatrix)).to be true
  end

  it "row() returns a NMatrix" do
    m = NMatrix.random(3)

    expect(m.row(2).is_a?(NMatrix)).to be true
  end

  it "diagonals() creates an NMatrix" do
    arr = [1,2,3,4]
    m = NMatrix.diagonals(arr)
    expect(m.is_a?(NMatrix)).to be true
  end

  it "diagonals() contains the seeded values on the diagonal" do
    arr = [1,2,3,4]
    m = NMatrix.diagonals(arr)
    expect(m[0,0]).to eq(arr[0])
    expect(m[1,1]).to eq(arr[1])
    expect(m[2,2]).to eq(arr[2])
    expect(m[3,3]).to eq(arr[3])
  end

  context "_like constructors" do
    before :each do
      @nm_1d = NMatrix[5.0,0.0,1.0,2.0,3.0]
      @nm_2d = NMatrix[[0.0,1.0],[2.0,3.0]]
    end

    it "should create an nmatrix of ones with dimensions and type the same as its argument" do
      pending("not yet implemented for NMatrix-JRuby") if jruby?
      expect(NMatrix.ones_like(@nm_1d)).to eq NMatrix[1.0, 1.0, 1.0, 1.0, 1.0]
      expect(NMatrix.ones_like(@nm_2d)).to eq NMatrix[[1.0, 1.0], [1.0, 1.0]]
    end

    it "should create an nmatrix of zeros with dimensions and type the same as its argument" do
      expect(NMatrix.zeros_like(@nm_1d)).to eq NMatrix[0.0, 0.0, 0.0, 0.0, 0.0]
      expect(NMatrix.zeros_like(@nm_2d)).to eq NMatrix[[0.0, 0.0], [0.0, 0.0]]
    end
  end

end

describe "NVector" do

  it "zeros() creates a vector of zeros" do
    v = NVector.zeros(4)

    4.times do |i|
      expect(v[i]).to eq(0)
    end
  end

  it "ones() creates a vector of ones" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    v = NVector.ones(3)

    3.times do |i|
      expect(v[i]).to eq(1)
    end
  end

  it "random() creates a vector of random numbers" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    v = NVector.random(4)
    expect(v.dtype).to eq(:float64)
    expect(v.stype).to eq(:dense)
  end

  it "seq() creates a vector of integers, sequentially" do
    v = NVector.seq(7)
    expect(v).to eq(NMatrix.new([7,1], [0, 1, 2, 3, 4, 5, 6]))
  end

  it "seq() only accepts integers as dimension" do
    expect { NVector.seq(3) }.to_not raise_error

    expect { NVector.seq([1, 3]) }.to raise_error
    expect { NVector.seq(:wtf) }.to raise_error
  end

  it "indgen() creates a vector of integers as well as seq()" do
    v = NVector.indgen(7)
    expect(v).to eq(NMatrix.new([7,1], [0, 1, 2, 3, 4, 5, 6]))
  end

  it "findgen creates a vector of floats, sequentially" do
    v = NVector.findgen(2)
    expect(v).to eq(NMatrix.new([2,1], [0.0, 1.0]))
  end

  it "bindgen() creates a vector of bytes, sequentially" do
    v = NVector.bindgen(4)
    expect(v).to eq(NMatrix.new([4,1], [0, 1, 2, 3], dtype: :byte))
  end

  it "cindgen() creates a vector of complexes, sequentially" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    v = NVector.cindgen(2)
    expect(v).to eq(NMatrix.new([2,1], [Complex(0.0, 0.0), Complex(1.0, 0.0)], dtype: :complex64))
  end

  it "linspace() creates a vector with n values equally spaced between a and b" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    v = NVector.linspace(0, 2, 5)
    expect(v).to eq(NMatrix.new([5,1], [0.0, 0.5, 1.0, 1.5, 2.0]))
  end

  it "logspace() creates a vector with n values logarithmically spaced between decades 10^a and 10^b" do
    pending("not yet implemented for NMatrix-JRuby") if jruby?
    v = NVector.logspace(0, 3, 4)
    expect(v).to eq(NMatrix.new([4,1], [1.0, 10.0, 100.0, 1000.0]))
  end
end

describe "Inline constructor" do

  it "creates a NMatrix with the given values" do
    m = NMatrix.new([2, 2], [1, 4, 6, 7])
    n = NMatrix[[1, 4], [6, 7]]

    expect(m).to eq n
  end
end


================================================
FILE: spec/slice_set_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == slice_set_spec.rb
#
# Test of slice set operations.

require 'spec_helper'
require 'pry'

describe "Set slice operation" do
  include RSpec::Longrun::DSL

  [:dense, :yale, :list].each do |stype|
    context "for #{stype}" do
      before :each do
        @m = create_matrix(stype)
      end

      example "set and unset a range of entries with single values" do

        if stype == :yale
          step "verify correct arrangement of Yale IJA and A arrays" do
            @m.extend NMatrix::YaleFunctions unless jruby?
            if jruby?
              pending("not yet implemented for NMatrix-JRuby")
            else
              expect(@m.yale_ija).to eq([4,6,8,10,1,2,0,2,0,1])
            end
            expect(@m.yale_a).to   eq([0,4,8,0, 1,2,3,5,6,7])
          end
        end

        step "set and reset a single entry" do
          n = @m.clone
          old_val = @m[0,0]
          @m[0,0] = 100
          expect(@m[0,0]).to eq(100)
          @m[0,0] = old_val
          expect(@m).to eq(n)
        end

        if stype == :yale
          n = @m.clone
          step "set a row of entries" do
            n[0,0..2] = 0
            expect(n[0,0..2].to_flat_array).to eq([0,0,0])
            expect(n[1,0..2].to_flat_array).to eq([3,4,5])
            expect(n[2,0..2].to_flat_array).to eq([6,7,8])
          end

          step "set a second row of entries" do
            n[2,0..2] = 0
            expect(n[2,0..2].to_flat_array).to eq([0,0,0])
            expect(n[1,0..2].to_flat_array).to eq([3,4,5])
          end

          step "reset both rows of entries" do
            n[0,0..2] = [0,1,2]
            n[2,0..2] = [6,7,8]
            expect(n).to eq(@m)
          end
        end

        slice_result_a = NMatrix.new(:dense, 2, 100, @m.dtype).cast(stype)
        slice_result_b = NMatrix.new(:dense, 2, 0,   @m.dtype).cast(stype)
        m = @m.clone

        step "set upper left-hand 2x2 corner to 100" do
          m[0..1,0..1] = 100

          if stype == :yale
            expect(m.yale_ija).to eq([4,   6,   8,   10,   1,   2,   0,   2,  0,  1])
            expect(m.yale_a).to   eq([100, 100, 8,   0,   100,  2, 100,   5,  6,  7])
          end

          expect(m[0..1,0..1]).to eq(slice_result_a)
          expect(m[2,0..1]).to eq(@m[2,0..1])
          expect(m[0..1,2]).to eq(@m[0..1,2])
        end

        step "set upper left-hand 2x2 corner to 0" do
          m[0..1,0..1] = 0
          if stype == :yale
            expect([4,5,6,8,2,2,0,1]).to eq(m.yale_ija)
            expect([0,0,8,0,2,5,6,7]).to eq(m.yale_a)
          end

          expect(m[0..1,0..1]).to eq(slice_result_b)
        end

        m = @m.clone
        step "set lower left-hand 2x2 corner to 100" do
          m[1..2,0..1] = 100
          expect(m[1..2,0..1]).to eq(slice_result_a)
          expect(m[0,0..1]).to eq(@m[0,0..1])
          expect(m[1..2,2]).to eq(@m[1..2,2])
        end

        step "set lower left-hand 2x2 corner to 0" do
          m[1..2,0..1] = 0
          expect(m[1..2,0..1]).to eq(slice_result_b)
        end

        m = @m.clone
        step "set lower right-hand 2x2 corner to 100" do
          m[1..2,1..2] = 100
          expect(m[1..2,1..2]).to eq(slice_result_a)
          expect(m[0,1..2]).to eq(@m[0,1..2])
          expect(m[1..2,0]).to eq(@m[1..2,0])
        end

        step "set lower right-hand 2x2 corner to 0" do
          m[1..2,1..2] = 0
          expect(m[1..2,1..2]).to eq(slice_result_b)
        end

        m = @m.clone
        step "set upper right-hand 2x2 corner to 100" do
          m[0..1,1..2] = 100
          expect(m[0..1,1..2]).to eq(slice_result_a)
          expect(m[2,1..2]).to eq(@m[2,1..2])
          expect(m[0..1,0]).to eq(@m[0..1,0])
        end

        step "set upper right-hand 2x2 corner to 0" do
          m[0..1,1..2] = 0
          expect(m[0..1,1..2]).to eq(slice_result_b)
        end
      end

      example "set a range of values to a matrix's contents" do
        pending("not yet implemented for int dtype for NMatrix-JRuby") if jruby?
        x = NMatrix.new(4, stype: :yale, dtype: :int16)
        x.extend NMatrix::YaleFunctions if stype == :yale
        x[1..3,1..3] = @m
        expect(x.to_flat_array).to eq([0,0,0,0, 0,0,1,2, 0,3,4,5, 0,6,7,8])
      end

    end
  end

end


================================================
FILE: spec/spec_helper.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == spec_helper.rb
#
# Common data and helper functions for testing.

require "rspec/longrun"
#require "narray/narray"

require "./lib/nmatrix"
require "./lib/nmatrix/rspec"

ALL_DTYPES = [:byte,:int8,:int16,:int32,:int64, :float32,:float64, :object,
  :complex64, :complex128]
  
NON_INTEGER_DTYPES = [:float32, :float64, :complex64, :complex128,
  :object]

FLOAT_DTYPES = [:float32, :float64]
  
MATRIX43A_ARRAY = [14.0, 9.0, 3.0, 2.0, 11.0, 15.0, 0.0, 12.0, 17.0, 5.0, 2.0, 3.0]
MATRIX32A_ARRAY = [12.0, 25.0, 9.0, 10.0, 8.0, 5.0]

COMPLEX_MATRIX43A_ARRAY = MATRIX43A_ARRAY.zip(MATRIX43A_ARRAY.reverse).collect { |ary| Complex(ary[0], ary[1]) }
COMPLEX_MATRIX32A_ARRAY = MATRIX32A_ARRAY.zip(MATRIX32A_ARRAY.reverse).collect { |ary| Complex(ary[0], -ary[1]) }

#3x4 matrix used for testing various getrf and LU decomposition functions
GETRF_EXAMPLE_ARRAY = [-1,0,10,4,9,2,3,5,7,8,1,6]
GETRF_SOLUTION_ARRAY = [9.0, 2.0, 3.0, 5.0, 7.0/9, 58.0/9, -4.0/3, 19.0/9, -1.0/9, 1.0/29, 301.0/29, 130.0/29]

TAU_SOLUTION_ARRAY = [1.8571428571428572,1.9938461538461538, 0.0]

GEQRF_SOLUTION_ARRAY =[                -14.0,                -21.0, 14.000000000000002,
                         0.23076923076923078,  -175.00000000000003,  70.00000000000001,
                        -0.15384615384615385, 0.055555555555555546,              -35.0]

R_SOLUTION_ARRAY   = [-159.2388143638353, -41.00131005172065, -56.75123892439876,  -90.75048729628048, 
                                     0.0, 25.137473501580676,  2.073591725046292,   9.790607357775713, 
                                     0.0,                0.0, -20.83259700334131, -17.592414929551445]

Q_SOLUTION_ARRAY_1 = [-0.632455532033676, -0.5209522876558295, -0.3984263084135902,  -0.41214704991068,
                    -0.42783756578748666, -0.20837937347171134, 0.876505919951498, 0.07259770177184455,
                    -0.48364246567281094, 0.8265854747306287,-0.015758658987033422, -0.2873988222474053,
                    -0.42783756578748666,  0.044081783789183565, -0.26971376257215296, 0.8615487797670971]

Q_SOLUTION_ARRAY_2 = [-0.8571428571428572,   0.3942857142857143,  0.33142857142857146, 
                      -0.4285714285714286,  -0.9028571428571428, -0.03428571428571425, 
                       0.28571428571428575, -0.1714285714285714,   0.9428571428571428]

Q_SOLUTION_ARRAY_3 = [-0.7724247413634004, -0.026670393594597247, -0.6345460653374136, 
                      -0.5777485870360393,  -0.38541856437557026,  0.7194853024298236,
                      -0.26375478973384403,   0.9223563413020934, 0.28229805268947933]

def create_matrix(stype) #:nodoc:
  m = NMatrix.new([3,3], 0, dtype: :int32, stype: stype, default: 0)

  m[0,0] = 0
  m[0,1] = 1
  m[0,2] = 2
  m[1,0] = 3
  m[1,1] = 4
  m[1,2] = 5
  m[2,0] = 6
  m[2,1] = 7
  m[2,2] = 8

  m
end

def create_rectangular_matrix(stype) #:nodoc:
  m = NMatrix.new([5,6], 0, dtype: :int32, stype: stype, default: 0)

  m[0,0] = 1
  m[0,1] = 2
  m[0,2] = 3
  m[0,3] = 4
  m[0,4] = 5
  m[0,5] = 0

  m[1,0] = 6
  m[1,1] = 7
  m[1,2] = 8
  m[1,3] = 9
  m[1,4] = 0
  m[1,5] = 10

  m[2,0] = 11
  m[2,1] = 12
  m[2,2] = 13
  m[2,3] = 0
  m[2,4] = 14
  m[2,5] = 15

  # skip row 3 -- all 0
  m[3,0] = m[3,1] = m[3,2] = m[3,3] = m[3,4] = m[3,5] = 0

  m[4,0] = 16
  m[4,1] = 0
  m[4,2] = 17
  m[4,3] = 18
  m[4,4] = 19
  m[4,5] = 20

  m
end

def create_vector(stype) #:nodoc:
  m = stype == :yale ? NVector.new(stype, 10, :int32) : NVector.new(stype, 10, 0, :int32)

  m[0] = 1
  m[1] = 2
  m[2] = 3
  m[3] = 4
  m[4] = 5
  m[5] = 6
  m[6] = 7
  m[7] = 8
  m[8] = 9
  m[9] = 10

  m
end

# Stupid but independent comparison for slice_spec
def nm_eql(n, m) #:nodoc:
  if n.shape != m.shape
    false
  else # NMatrix
    n.shape[0].times do |i|
      n.shape[1].times do |j|
        if n[i,j] != m[i,j]
          puts "n[#{i},#{j}] != m[#{i},#{j}] (#{n[i,j]} != #{m[i,j]})"
          return false
        end
      end
    end
  end
  true
end

def integer_dtype? dtype
  [:byte,:int8,:int16,:int32,:int64].include?(dtype)
end

# If a focus: true option is supplied to any test, running `rake spec focus=true`
# will run only the focused tests and nothing else.
if ENV["focus"] == "true"
  RSpec.configure do |c|
    c.filter_run :focus => true
  end
end


================================================
FILE: spec/stat_spec.rb
================================================
# = NMatrix
#
# A linear algebra library for scientific computation in Ruby.
# NMatrix is part of SciRuby.
#
# NMatrix was originally inspired by and derived from NArray, by
# Masahiro Tanaka: http://narray.rubyforge.org
#
# == Copyright Information
#
# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
#
# Please see LICENSE.txt for additional copyright notices.
#
# == Contributing
#
# By contributing source code to SciRuby, you agree to be bound by
# our Contributor Agreement:
#
# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
#
# == stat_spec.rb
#
# Tests for statistical functions in NMatrix.
#

require 'spec_helper'
require 'pry'

describe "Statistical functions" do
  context "mapping and reduction related functions" do
    [:dense, :yale, :list].each do |stype|
      context "on #{stype} matrices" do
        let(:nm_1d) { NMatrix.new([5], [5.0,0.0,1.0,2.0,3.0], stype: stype) unless stype == :yale }
        let(:nm_2d) { NMatrix.new([2,2], [0.0, 1.0, 2.0, 3.0], stype: stype) }

        it "behaves like Enumerable#reduce with no argument to reduce" do
          expect(nm_1d.reduce_along_dim(0) { |acc, el| acc + el }.to_f).to eq 11 unless stype == :yale
          expect(nm_2d.reduce_along_dim(1) { |acc, el| acc + el }).to eq NMatrix.new([2,1], [1.0, 5.0], stype: stype)
        end

        it "should calculate the mean along the specified dimension" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          unless stype == :yale then
            puts nm_1d.mean
            expect(nm_1d.mean).to eq NMatrix.new([1], [2.2], stype: stype, dtype: :float64)
          end
          expect(nm_2d.mean).to eq NMatrix[[1.0,2.0], stype: stype]
          expect(nm_2d.mean(1)).to eq NMatrix[[0.5], [2.5], stype: stype]
        end

        it "should calculate the minimum along the specified dimension" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          expect(nm_1d.min).to eq 0.0 unless stype == :yale
          expect(nm_2d.min).to eq NMatrix[[0.0, 1.0], stype: stype]
          expect(nm_2d.min(1)).to eq NMatrix[[0.0], [2.0], stype: stype]
        end

        it "should calculate the maximum along the specified dimension" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          expect(nm_1d.max).to eq 5.0  unless stype == :yale
          expect(nm_2d.max).to eq NMatrix[[2.0, 3.0], stype: stype]
        end

        it "should calculate the variance along the specified dimension" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          expect(nm_1d.variance).to eq NMatrix[3.7, stype: stype] unless stype == :yale
          expect(nm_2d.variance(1)).to eq NMatrix[[0.5], [0.5], stype: stype]
        end

        it "should calculate the sum along the specified dimension" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          expect(nm_1d.sum).to eq NMatrix[11.0, stype: stype] unless stype == :yale
          expect(nm_2d.sum).to eq NMatrix[[2.0, 4.0], stype: stype]
        end

        it "should calculate the standard deviation along the specified dimension" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          expect(nm_1d.std).to eq NMatrix[Math.sqrt(3.7), stype: stype] unless stype == :yale
          expect(nm_2d.std(1)).to eq NMatrix[[Math.sqrt(0.5)], [Math.sqrt(0.5)], stype: stype]
        end

        it "should raise an ArgumentError when any invalid dimension is provided" do
          expect { nm_1d.mean(3) }.to raise_exception(RangeError) unless stype == :yale
          expect { nm_2d.mean(3) }.to raise_exception(RangeError)
        end

        it "should convert to float if it contains only a single element" do
          expect(NMatrix[4.0, stype: stype].to_f).to eq 4.0  unless stype == :yale
          expect(NMatrix[[[[4.0]]], stype: stype].to_f).to eq 4.0  unless stype == :yale
          expect(NMatrix[[4.0], stype: stype].to_f).to eq 4.0
        end

        it "should raise an index error if it contains more than a single element" do
          expect { nm_1d.to_f }.to raise_error(IndexError)  unless stype == :yale
          expect { nm_2d.to_f }.to raise_error(IndexError)
        end

        it "should map a block to all elements" do
          expect(nm_1d.map { |e| e ** 2 }).to eq NMatrix[25.0,0.0,1.0,4.0,9.0, stype: stype] unless stype == :yale
          expect(nm_2d.map { |e| e ** 2 }).to eq NMatrix[[0.0,1.0],[4.0,9.0], stype: stype]
        end

        it "should map! a block to all elements in place" do
          fct = Proc.new { |e| e ** 2 }
          unless stype == :yale then
            expected1 = nm_1d.map(&fct)
            nm_1d.map!(&fct)
            expect(nm_1d).to eq expected1
          end
          expected2 = nm_2d.map(&fct)
          nm_2d.map!(&fct)
          expect(nm_2d).to eq expected2
        end

        it "should return an enumerator for map without a block" do
          expect(nm_2d.map).to be_a Enumerator
        end

        it "should return an enumerator for reduce without a block" do
          expect(nm_2d.reduce_along_dim(0)).to be_a Enumerator
        end

        it "should return an enumerator for each_along_dim without a block" do
          expect(nm_2d.each_along_dim(0)).to be_a Enumerator
        end

        it "should iterate correctly for map without a block" do
          en = nm_1d.map unless stype == :yale
          expect(en.each { |e| e**2 }).to eq nm_1d.map { |e| e**2 } unless stype == :yale
          en = nm_2d.map
          expect(en.each { |e| e**2 }).to eq nm_2d.map { |e| e**2 }
        end

        it "should iterate correctly for reduce without a block" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          unless stype == :yale then
            en = nm_1d.reduce_along_dim(0, 1.0)
            expect(en.each { |a, e| a+e }.to_f).to eq 12
          end
          en = nm_2d.reduce_along_dim(1, 1.0)
          expect(en.each { |a, e| a+e }).to eq NMatrix[[2.0],[6.0], stype: stype]
        end

        it "should iterate correctly for each_along_dim without a block" do
          unless stype == :yale then
            res = NMatrix.zeros_like(nm_1d[0...1])
            en = nm_1d.each_along_dim(0)
            en.each { |e| res += e }
            expect(res.to_f).to eq 11
          end
          res = NMatrix.zeros_like (nm_2d[0...2, 0])
          en = nm_2d.each_along_dim(1)
          en.each { |e| res += e }
          expect(res).to eq NMatrix[[1.0], [5.0], stype: stype]
        end

        it "should yield matrices of matching dtype for each_along_dim" do
          m = NMatrix.new([2,3], [1,2,3,3,4,5], dtype: :complex128, stype: stype)
          m.each_along_dim(1) do |sub_m|
            expect(sub_m.dtype).to eq :complex128
          end
        end

        it "should reduce to a matrix of matching dtype for reduce_along_dim" do
          m = NMatrix.new([2,3], [1,2,3,3,4,5], dtype: :complex128, stype: stype)
          m.reduce_along_dim(1) do |acc, sub_m|
            expect(sub_m.dtype).to eq :complex128
            acc
          end

          m.reduce_along_dim(1, 0.0) do |acc, sub_m|
            expect(sub_m.dtype).to eq :complex128
            acc
          end
        end

        it "should allow overriding the dtype for reduce_along_dim" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          m = NMatrix[[1,2,3], [3,4,5], dtype: :complex128]
          m.reduce_along_dim(1, 0.0, :float64) do |acc, sub_m|
            expect(acc.dtype).to eq :float64
            acc
          end

          m = NMatrix[[1,2,3], [3,4,5], dtype: :complex128, stype: stype]
          m.reduce_along_dim(1, nil, :float64) do |acc, sub_m|
            expect(acc.dtype).to eq :float64
            acc
          end
        end

        it "should convert integer dtypes to float when calculating mean" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          m = NMatrix[[1,2,3], [3,4,5], dtype: :int32, stype: stype]
          expect(m.mean(0).dtype).to eq :float64
        end

        it "should convert integer dtypes to float when calculating variance" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          m = NMatrix[[1,2,3], [3,4,5], dtype: :int32, stype: stype]
          expect(m.variance(0).dtype).to eq :float64
        end

        it "should convert integer dtypes to float when calculating standard deviation" do
          pending("not yet implemented for NMatrix-JRuby") if jruby?
          m = NMatrix[[1,2,3], [3,4,5], dtype: :int32, stype: stype]
          expect(m.std(0).dtype).to eq :float64
        end
      end
    end
  end
end


================================================
FILE: spec/test.pcd
================================================
VERSION .7
FIELDS x y z intensity
SIZE 4 8 8 4
TYPE F U F I
COUNT 1 1 1 1
WIDTH 256 # comment here to challenge this thing
HEIGHT 256
VIEWPOINT 0 0 0 1 0 0 0
POINTS 10
DATA ASCII
207.008 207.058 1174 0
207.008 205.441 1174 0
207.008 203.823 1174 0
207.008 202.206 1174 0
207.008 200.589 1174 0
207.008 198.972 1174 0
207.008 197.354 1174 0
207.008 195.737 1174 0
207.008 194.12 1174 0
207.008 153.689 1174 0


================================================
FILE: travis.sh
================================================
#!/bin/bash

set -ev #fail at the first command that returns non-zero exit value

# Use rbenv on OSX iff ruby_version is given
if [ -n "$ruby_version" -a "$TRAVIS_OS_NAME" = "osx" ]; then
  export PATH="$HOME/.rbenv/bin:$PATH"
  if [ -x $HOME/.rbenv/bin/rbenv ]; then
    eval "$(rbenv init -)"
  fi
  export RBENV_VERSION=$ruby_version
  unset GEM_PATH GEM_HOME
fi

if [ "$1" = "install" ]
then
  bundle install --jobs=3 --retry=3 --path=${BUNDLE_PATH:-vendor/bundle}
fi

if [ "$1" = "before_install" ]
then
  case "$TRAVIS_OS_NAME" in
    linux)
      sudo apt-get update -qq
      ;;
    osx)
      brew update >/dev/null
      ;;
  esac

  # Installing ruby by using rbenv on OSX iff ruby_version is given
  if [ -n "$ruby_version" -a "$TRAVIS_OS_NAME" = "osx" ]; then
    git clone https://github.com/rbenv/rbenv.git ~/.rbenv
    git clone https://github.com/rbenv/ruby-build.git ~/.rbenv/plugins/ruby-build

    eval "$(rbenv init -)"

    # Install ruby
    (
      brew install bison openssl readline
      brew link --force openssl
      RBENV_VERSION=system
      MAKEOPTS='-j 4'
      CONFIGURE_OPTS="--disable-install-doc --with-out-ext=tk,tk/tkutil --with-opt-dir=/usr/local"
      rbenv install --verbose $ruby_version
    )

    gem pristine --all
    gem update --no-document --system
    gem update --no-document
  fi

  gem install --no-document bundler -v '~> 1.6'

  if [ -n "$USE_ATLAS" ]
  then
    case "$TRAVIS_OS_NAME" in
      linux)
        sudo apt-get install -y libatlas-base-dev
        ;;
      osx)
        echo "FIXME: ATLAS on OSX environment is not supported, currently" >2
        exit 1
        ;;
    esac
  fi

  # travis-ci runs on Ubuntu 12.04, where the openblas package doesn't
  # provide a liblapack.so, so we test using the blas from openblas
  # and the reference lapack implementation. Not entirely sure if
  # this will work.
  if [ -n "$USE_OPENBLAS" ]
  then
    case "$TRAVIS_OS_NAME" in
      linux)
        sudo apt-get install -y libopenblas-dev
        # Since we install libopenblas first, liblapack won't try to install
        # libblas (the reference BLAS implementation).
        sudo apt-get install -y liblapack-dev
        ;;
      osx)
        brew install homebrew/science/openblas
        ;;
    esac
  fi

  if [ -n "$USE_REF" ]
  then
    case "$TRAVIS_OS_NAME" in
      linux)
        sudo apt-get install -y liblapack-dev
        ;;
      osx)
        brew install homebrew/dupes/lapack
        ;;
    esac
  fi
fi

if [ "$1" = "script" ]
then
  nmatrix_plugins_opt=''

  if [ -n "$USE_ATLAS" ]
  then
    # Need to put these commands on separate lines (rather than use &&)
    # so that bash set -e will work.
    nmatrix_plugins_opt='nmatrix_plugins=atlas'
  fi

  if [ -n "$USE_OPENBLAS" ]
  then
    nmatrix_plugins_opt='nmatrix_plugins=lapacke'
  fi

  if [ -n "$USE_REF" ]
  then
    nmatrix_plugins_opt='nmatrix_plugins=lapacke'
  fi

  if [ -n "$NO_EXTERNAL_LIB" ]
  then
    nmatrix_plugins_opt=''
  fi

  bundle exec rake travis:env

  if [[ "$TRAVIS_RUBY_VERSION" =~ "jruby" ]];then
    bundle exec rake jruby
    bundle exec rake spec
  else
    bundle exec rake compile $nmatrix_plugins_opt || {
      echo === Contents of mkmf.log ===
      cat tmp/*/nmatrix/*/mkmf.log
      exit 1
    }
    bundle exec rake spec $nmatrix_plugins_opt
  fi

fi